From 301e7ee1dec513e5aca12d01c819a1f762918d0a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 22 Jul 2019 16:21:05 +0200 Subject: [PATCH 1/8] Revert "iommu/vt-d: Consolidate domain_init() to avoid duplication" This reverts commit 123b2ffc376e1b3e9e015c75175b61e88a8b8518. This commit reportedly caused boot failures on some systems and needs to be reverted for now. Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 123 +++++++++++++++++++++++++----------- 1 file changed, 87 insertions(+), 36 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index ac4172c02244..441781d12553 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1833,6 +1833,63 @@ static inline int guestwidth_to_adjustwidth(int gaw) return agaw; } +static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, + int guest_width) +{ + int adjust_width, agaw; + unsigned long sagaw; + int err; + + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); + + err = init_iova_flush_queue(&domain->iovad, + iommu_flush_iova, iova_entry_free); + if (err) + return err; + + domain_reserve_special_ranges(domain); + + /* calculate AGAW */ + if (guest_width > cap_mgaw(iommu->cap)) + guest_width = cap_mgaw(iommu->cap); + domain->gaw = guest_width; + adjust_width = guestwidth_to_adjustwidth(guest_width); + agaw = width_to_agaw(adjust_width); + sagaw = cap_sagaw(iommu->cap); + if (!test_bit(agaw, &sagaw)) { + /* hardware doesn't support it, choose a bigger one */ + pr_debug("Hardware doesn't support agaw %d\n", agaw); + agaw = find_next_bit(&sagaw, 5, agaw); + if (agaw >= 5) + return -ENODEV; + } + domain->agaw = agaw; + + if (ecap_coherent(iommu->ecap)) + domain->iommu_coherency = 1; + else + domain->iommu_coherency = 0; + + if (ecap_sc_support(iommu->ecap)) + domain->iommu_snooping = 1; + else + domain->iommu_snooping = 0; + + if (intel_iommu_superpage) + domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); + else + domain->iommu_superpage = 0; + + domain->nid = iommu->node; + + /* always allocate the top pgd */ + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); + if (!domain->pgd) + return -ENOMEM; + __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); + return 0; +} + static void domain_exit(struct dmar_domain *domain) { struct page *freelist; @@ -2513,31 +2570,6 @@ static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque) return 0; } -static int domain_init(struct dmar_domain *domain, int guest_width) -{ - int adjust_width; - - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); - domain_reserve_special_ranges(domain); - - /* calculate AGAW */ - domain->gaw = guest_width; - adjust_width = guestwidth_to_adjustwidth(guest_width); - domain->agaw = width_to_agaw(adjust_width); - - domain->iommu_coherency = 0; - domain->iommu_snooping = 0; - domain->iommu_superpage = 0; - domain->max_addr = 0; - - /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); - if (!domain->pgd) - return -ENOMEM; - domain_flush_cache(domain, domain->pgd, PAGE_SIZE); - return 0; -} - static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) { struct device_domain_info *info; @@ -2575,19 +2607,11 @@ static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) domain = alloc_domain(0); if (!domain) return NULL; - - if (domain_init(domain, gaw)) { + if (domain_init(domain, iommu, gaw)) { domain_exit(domain); return NULL; } - if (init_iova_flush_queue(&domain->iovad, - iommu_flush_iova, - iova_entry_free)) { - pr_warn("iova flush queue initialization failed\n"); - intel_iommu_strict = 1; - } - out: return domain; } @@ -2692,6 +2716,8 @@ static int domain_prepare_identity_map(struct device *dev, return iommu_domain_identity_map(domain, start, end); } +static int md_domain_init(struct dmar_domain *domain, int guest_width); + static int __init si_domain_init(int hw) { struct dmar_rmrr_unit *rmrr; @@ -2702,7 +2728,7 @@ static int __init si_domain_init(int hw) if (!si_domain) return -EFAULT; - if (domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { domain_exit(si_domain); return -EFAULT; } @@ -4829,6 +4855,31 @@ static void dmar_remove_one_dev_info(struct device *dev) spin_unlock_irqrestore(&device_domain_lock, flags); } +static int md_domain_init(struct dmar_domain *domain, int guest_width) +{ + int adjust_width; + + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); + domain_reserve_special_ranges(domain); + + /* calculate AGAW */ + domain->gaw = guest_width; + adjust_width = guestwidth_to_adjustwidth(guest_width); + domain->agaw = width_to_agaw(adjust_width); + + domain->iommu_coherency = 0; + domain->iommu_snooping = 0; + domain->iommu_superpage = 0; + domain->max_addr = 0; + + /* always allocate the top pgd */ + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); + if (!domain->pgd) + return -ENOMEM; + domain_flush_cache(domain, domain->pgd, PAGE_SIZE); + return 0; +} + static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) { struct dmar_domain *dmar_domain; @@ -4843,7 +4894,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) pr_err("Can't allocate dmar_domain\n"); return NULL; } - if (domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { pr_err("Domain initialization failed\n"); domain_exit(dmar_domain); return NULL; From 557529494d79f3f1fadd486dd18d2de0b19be4da Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 9 Jul 2019 13:22:45 +0800 Subject: [PATCH 2/8] iommu/vt-d: Avoid duplicated pci dma alias consideration As we have abandoned the home-made lazy domain allocation and delegated the DMA domain life cycle up to the default domain mechanism defined in the generic iommu layer, we needn't consider pci alias anymore when mapping/unmapping the context entries. Without this fix, we see kernel NULL pointer dereference during pci device hot-plug test. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Fixes: fa954e6831789 ("iommu/vt-d: Delegate the dma domain to upper layer") Signed-off-by: Lu Baolu Reported-and-tested-by: Xu Pengfei Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 55 ++----------------------------------- 1 file changed, 2 insertions(+), 53 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 441781d12553..9b1d62d03370 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -339,8 +339,6 @@ static void domain_exit(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain); static void dmar_remove_one_dev_info(struct device *dev); static void __dmar_remove_one_dev_info(struct device_domain_info *info); -static void domain_context_clear(struct intel_iommu *iommu, - struct device *dev); static int domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); static bool device_is_rmrr_locked(struct device *dev); @@ -2105,26 +2103,9 @@ out_unlock: return ret; } -struct domain_context_mapping_data { - struct dmar_domain *domain; - struct intel_iommu *iommu; - struct pasid_table *table; -}; - -static int domain_context_mapping_cb(struct pci_dev *pdev, - u16 alias, void *opaque) -{ - struct domain_context_mapping_data *data = opaque; - - return domain_context_mapping_one(data->domain, data->iommu, - data->table, PCI_BUS_NUM(alias), - alias & 0xff); -} - static int domain_context_mapping(struct dmar_domain *domain, struct device *dev) { - struct domain_context_mapping_data data; struct pasid_table *table; struct intel_iommu *iommu; u8 bus, devfn; @@ -2134,17 +2115,7 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev) return -ENODEV; table = intel_pasid_get_table(dev); - - if (!dev_is_pci(dev)) - return domain_context_mapping_one(domain, iommu, table, - bus, devfn); - - data.domain = domain; - data.iommu = iommu; - data.table = table; - - return pci_for_each_dma_alias(to_pci_dev(dev), - &domain_context_mapping_cb, &data); + return domain_context_mapping_one(domain, iommu, table, bus, devfn); } static int domain_context_mapped_cb(struct pci_dev *pdev, @@ -4784,28 +4755,6 @@ out_free_dmar: return ret; } -static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque) -{ - struct intel_iommu *iommu = opaque; - - domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff); - return 0; -} - -/* - * NB - intel-iommu lacks any sort of reference counting for the users of - * dependent devices. If multiple endpoints have intersecting dependent - * devices, unbinding the driver from any one of them will possibly leave - * the others unable to operate. - */ -static void domain_context_clear(struct intel_iommu *iommu, struct device *dev) -{ - if (!iommu || !dev || !dev_is_pci(dev)) - return; - - pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu); -} - static void __dmar_remove_one_dev_info(struct device_domain_info *info) { struct dmar_domain *domain; @@ -4826,7 +4775,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) PASID_RID2PASID); iommu_disable_dev_iotlb(info); - domain_context_clear(iommu, info->dev); + domain_context_clear_one(iommu, info->bus, info->devfn); intel_pasid_free_table(info->dev); } From effa467870c7612012885df4e246bdb8ffd8e44c Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 16 Jul 2019 22:38:05 +0100 Subject: [PATCH 3/8] iommu/vt-d: Don't queue_iova() if there is no flush queue Intel VT-d driver was reworked to use common deferred flushing implementation. Previously there was one global per-cpu flush queue, afterwards - one per domain. Before deferring a flush, the queue should be allocated and initialized. Currently only domains with IOMMU_DOMAIN_DMA type initialize their flush queue. It's probably worth to init it for static or unmanaged domains too, but it may be arguable - I'm leaving it to iommu folks. Prevent queuing an iova flush if the domain doesn't have a queue. The defensive check seems to be worth to keep even if queue would be initialized for all kinds of domains. And is easy backportable. On 4.19.43 stable kernel it has a user-visible effect: previously for devices in si domain there were crashes, on sata devices: BUG: spinlock bad magic on CPU#6, swapper/0/1 lock: 0xffff88844f582008, .magic: 00000000, .owner: /-1, .owner_cpu: 0 CPU: 6 PID: 1 Comm: swapper/0 Not tainted 4.19.43 #1 Call Trace: dump_stack+0x61/0x7e spin_bug+0x9d/0xa3 do_raw_spin_lock+0x22/0x8e _raw_spin_lock_irqsave+0x32/0x3a queue_iova+0x45/0x115 intel_unmap+0x107/0x113 intel_unmap_sg+0x6b/0x76 __ata_qc_complete+0x7f/0x103 ata_qc_complete+0x9b/0x26a ata_qc_complete_multiple+0xd0/0xe3 ahci_handle_port_interrupt+0x3ee/0x48a ahci_handle_port_intr+0x73/0xa9 ahci_single_level_irq_intr+0x40/0x60 __handle_irq_event_percpu+0x7f/0x19a handle_irq_event_percpu+0x32/0x72 handle_irq_event+0x38/0x56 handle_edge_irq+0x102/0x121 handle_irq+0x147/0x15c do_IRQ+0x66/0xf2 common_interrupt+0xf/0xf RIP: 0010:__do_softirq+0x8c/0x2df The same for usb devices that use ehci-pci: BUG: spinlock bad magic on CPU#0, swapper/0/1 lock: 0xffff88844f402008, .magic: 00000000, .owner: /-1, .owner_cpu: 0 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.43 #4 Call Trace: dump_stack+0x61/0x7e spin_bug+0x9d/0xa3 do_raw_spin_lock+0x22/0x8e _raw_spin_lock_irqsave+0x32/0x3a queue_iova+0x77/0x145 intel_unmap+0x107/0x113 intel_unmap_page+0xe/0x10 usb_hcd_unmap_urb_setup_for_dma+0x53/0x9d usb_hcd_unmap_urb_for_dma+0x17/0x100 unmap_urb_for_dma+0x22/0x24 __usb_hcd_giveback_urb+0x51/0xc3 usb_giveback_urb_bh+0x97/0xde tasklet_action_common.isra.4+0x5f/0xa1 tasklet_action+0x2d/0x30 __do_softirq+0x138/0x2df irq_exit+0x7d/0x8b smp_apic_timer_interrupt+0x10f/0x151 apic_timer_interrupt+0xf/0x20 RIP: 0010:_raw_spin_unlock_irqrestore+0x17/0x39 Cc: David Woodhouse Cc: Joerg Roedel Cc: Lu Baolu Cc: iommu@lists.linux-foundation.org Cc: # 4.14+ Fixes: 13cf01744608 ("iommu/vt-d: Make use of iova deferred flushing") Signed-off-by: Dmitry Safonov Reviewed-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 3 ++- drivers/iommu/iova.c | 18 ++++++++++++++---- include/linux/iova.h | 6 ++++++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 9b1d62d03370..72c6d647bec9 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3561,7 +3561,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) freelist = domain_unmap(domain, start_pfn, last_pfn); - if (intel_iommu_strict || (pdev && pdev->untrusted)) { + if (intel_iommu_strict || (pdev && pdev->untrusted) || + !has_iova_flush_queue(&domain->iovad)) { iommu_flush_iotlb_psi(iommu, domain, start_pfn, nrpages, !freelist, 0); /* free iova */ diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index d499b2621239..8413ae54904a 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -54,9 +54,14 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, } EXPORT_SYMBOL_GPL(init_iova_domain); +bool has_iova_flush_queue(struct iova_domain *iovad) +{ + return !!iovad->fq; +} + static void free_iova_flush_queue(struct iova_domain *iovad) { - if (!iovad->fq) + if (!has_iova_flush_queue(iovad)) return; if (timer_pending(&iovad->fq_timer)) @@ -74,13 +79,14 @@ static void free_iova_flush_queue(struct iova_domain *iovad) int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) { + struct iova_fq __percpu *queue; int cpu; atomic64_set(&iovad->fq_flush_start_cnt, 0); atomic64_set(&iovad->fq_flush_finish_cnt, 0); - iovad->fq = alloc_percpu(struct iova_fq); - if (!iovad->fq) + queue = alloc_percpu(struct iova_fq); + if (!queue) return -ENOMEM; iovad->flush_cb = flush_cb; @@ -89,13 +95,17 @@ int init_iova_flush_queue(struct iova_domain *iovad, for_each_possible_cpu(cpu) { struct iova_fq *fq; - fq = per_cpu_ptr(iovad->fq, cpu); + fq = per_cpu_ptr(queue, cpu); fq->head = 0; fq->tail = 0; spin_lock_init(&fq->lock); } + smp_wmb(); + + iovad->fq = queue; + timer_setup(&iovad->fq_timer, fq_flush_timeout, 0); atomic_set(&iovad->fq_timer_on, 0); diff --git a/include/linux/iova.h b/include/linux/iova.h index 781b96ac706f..cd0f1de901a8 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -155,6 +155,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); +bool has_iova_flush_queue(struct iova_domain *iovad); int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); @@ -235,6 +236,11 @@ static inline void init_iova_domain(struct iova_domain *iovad, { } +bool has_iova_flush_queue(struct iova_domain *iovad) +{ + return false; +} + static inline int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) From 3ee9eca760e7d0b68c55813243de66bbb499dc3b Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 16 Jul 2019 22:38:06 +0100 Subject: [PATCH 4/8] iommu/vt-d: Check if domain->pgd was allocated There is a couple of places where on domain_init() failure domain_exit() is called. While currently domain_init() can fail only if alloc_pgtable_page() has failed. Make domain_exit() check if domain->pgd present, before calling domain_unmap(), as it theoretically should crash on clearing pte entries in dma_pte_clear_level(). Cc: David Woodhouse Cc: Joerg Roedel Cc: Lu Baolu Cc: iommu@lists.linux-foundation.org Signed-off-by: Dmitry Safonov Reviewed-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 72c6d647bec9..bdaed2da8a55 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1890,7 +1890,6 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, static void domain_exit(struct dmar_domain *domain) { - struct page *freelist; /* Remove associated devices and clear attached or cached domains */ domain_remove_dev_info(domain); @@ -1898,9 +1897,12 @@ static void domain_exit(struct dmar_domain *domain) /* destroy iovas */ put_iova_domain(&domain->iovad); - freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); + if (domain->pgd) { + struct page *freelist; - dma_free_pagelist(freelist); + freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); + dma_free_pagelist(freelist); + } free_domain_mem(domain); } From 9eed17d37c77171cf5ffb95c4257f87df3cd4c8f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 20 Jul 2019 19:08:48 +0100 Subject: [PATCH 5/8] iommu/iova: Remove stale cached32_node Since the cached32_node is allowed to be advanced above dma_32bit_pfn (to provide a shortcut into the limited range), we need to be careful to remove the to be freed node if it is the cached32_node. [ 48.477773] BUG: KASAN: use-after-free in __cached_rbnode_delete_update+0x68/0x110 [ 48.477812] Read of size 8 at addr ffff88870fc19020 by task kworker/u8:1/37 [ 48.477843] [ 48.477879] CPU: 1 PID: 37 Comm: kworker/u8:1 Tainted: G U 5.2.0+ #735 [ 48.477915] Hardware name: Intel Corporation NUC7i5BNK/NUC7i5BNB, BIOS BNKBL357.86A.0052.2017.0918.1346 09/18/2017 [ 48.478047] Workqueue: i915 __i915_gem_free_work [i915] [ 48.478075] Call Trace: [ 48.478111] dump_stack+0x5b/0x90 [ 48.478137] print_address_description+0x67/0x237 [ 48.478178] ? __cached_rbnode_delete_update+0x68/0x110 [ 48.478212] __kasan_report.cold.3+0x1c/0x38 [ 48.478240] ? __cached_rbnode_delete_update+0x68/0x110 [ 48.478280] ? __cached_rbnode_delete_update+0x68/0x110 [ 48.478308] __cached_rbnode_delete_update+0x68/0x110 [ 48.478344] private_free_iova+0x2b/0x60 [ 48.478378] iova_magazine_free_pfns+0x46/0xa0 [ 48.478403] free_iova_fast+0x277/0x340 [ 48.478443] fq_ring_free+0x15a/0x1a0 [ 48.478473] queue_iova+0x19c/0x1f0 [ 48.478597] cleanup_page_dma.isra.64+0x62/0xb0 [i915] [ 48.478712] __gen8_ppgtt_cleanup+0x63/0x80 [i915] [ 48.478826] __gen8_ppgtt_cleanup+0x42/0x80 [i915] [ 48.478940] __gen8_ppgtt_clear+0x433/0x4b0 [i915] [ 48.479053] __gen8_ppgtt_clear+0x462/0x4b0 [i915] [ 48.479081] ? __sg_free_table+0x9e/0xf0 [ 48.479116] ? kfree+0x7f/0x150 [ 48.479234] i915_vma_unbind+0x1e2/0x240 [i915] [ 48.479352] i915_vma_destroy+0x3a/0x280 [i915] [ 48.479465] __i915_gem_free_objects+0xf0/0x2d0 [i915] [ 48.479579] __i915_gem_free_work+0x41/0xa0 [i915] [ 48.479607] process_one_work+0x495/0x710 [ 48.479642] worker_thread+0x4c7/0x6f0 [ 48.479687] ? process_one_work+0x710/0x710 [ 48.479724] kthread+0x1b2/0x1d0 [ 48.479774] ? kthread_create_worker_on_cpu+0xa0/0xa0 [ 48.479820] ret_from_fork+0x1f/0x30 [ 48.479864] [ 48.479907] Allocated by task 631: [ 48.479944] save_stack+0x19/0x80 [ 48.479994] __kasan_kmalloc.constprop.6+0xc1/0xd0 [ 48.480038] kmem_cache_alloc+0x91/0xf0 [ 48.480082] alloc_iova+0x2b/0x1e0 [ 48.480125] alloc_iova_fast+0x58/0x376 [ 48.480166] intel_alloc_iova+0x90/0xc0 [ 48.480214] intel_map_sg+0xde/0x1f0 [ 48.480343] i915_gem_gtt_prepare_pages+0xb8/0x170 [i915] [ 48.480465] huge_get_pages+0x232/0x2b0 [i915] [ 48.480590] ____i915_gem_object_get_pages+0x40/0xb0 [i915] [ 48.480712] __i915_gem_object_get_pages+0x90/0xa0 [i915] [ 48.480834] i915_gem_object_prepare_write+0x2d6/0x330 [i915] [ 48.480955] create_test_object.isra.54+0x1a9/0x3e0 [i915] [ 48.481075] igt_shared_ctx_exec+0x365/0x3c0 [i915] [ 48.481210] __i915_subtests.cold.4+0x30/0x92 [i915] [ 48.481341] __run_selftests.cold.3+0xa9/0x119 [i915] [ 48.481466] i915_live_selftests+0x3c/0x70 [i915] [ 48.481583] i915_pci_probe+0xe7/0x220 [i915] [ 48.481620] pci_device_probe+0xe0/0x180 [ 48.481665] really_probe+0x163/0x4e0 [ 48.481710] device_driver_attach+0x85/0x90 [ 48.481750] __driver_attach+0xa5/0x180 [ 48.481796] bus_for_each_dev+0xda/0x130 [ 48.481831] bus_add_driver+0x205/0x2e0 [ 48.481882] driver_register+0xca/0x140 [ 48.481927] do_one_initcall+0x6c/0x1af [ 48.481970] do_init_module+0x106/0x350 [ 48.482010] load_module+0x3d2c/0x3ea0 [ 48.482058] __do_sys_finit_module+0x110/0x180 [ 48.482102] do_syscall_64+0x62/0x1f0 [ 48.482147] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 48.482190] [ 48.482224] Freed by task 37: [ 48.482273] save_stack+0x19/0x80 [ 48.482318] __kasan_slab_free+0x12e/0x180 [ 48.482363] kmem_cache_free+0x70/0x140 [ 48.482406] __free_iova+0x1d/0x30 [ 48.482445] fq_ring_free+0x15a/0x1a0 [ 48.482490] queue_iova+0x19c/0x1f0 [ 48.482624] cleanup_page_dma.isra.64+0x62/0xb0 [i915] [ 48.482749] __gen8_ppgtt_cleanup+0x63/0x80 [i915] [ 48.482873] __gen8_ppgtt_cleanup+0x42/0x80 [i915] [ 48.482999] __gen8_ppgtt_clear+0x433/0x4b0 [i915] [ 48.483123] __gen8_ppgtt_clear+0x462/0x4b0 [i915] [ 48.483250] i915_vma_unbind+0x1e2/0x240 [i915] [ 48.483378] i915_vma_destroy+0x3a/0x280 [i915] [ 48.483500] __i915_gem_free_objects+0xf0/0x2d0 [i915] [ 48.483622] __i915_gem_free_work+0x41/0xa0 [i915] [ 48.483659] process_one_work+0x495/0x710 [ 48.483704] worker_thread+0x4c7/0x6f0 [ 48.483748] kthread+0x1b2/0x1d0 [ 48.483787] ret_from_fork+0x1f/0x30 [ 48.483831] [ 48.483868] The buggy address belongs to the object at ffff88870fc19000 [ 48.483868] which belongs to the cache iommu_iova of size 40 [ 48.483920] The buggy address is located 32 bytes inside of [ 48.483920] 40-byte region [ffff88870fc19000, ffff88870fc19028) [ 48.483964] The buggy address belongs to the page: [ 48.484006] page:ffffea001c3f0600 refcount:1 mapcount:0 mapping:ffff8888181a91c0 index:0x0 compound_mapcount: 0 [ 48.484045] flags: 0x8000000000010200(slab|head) [ 48.484096] raw: 8000000000010200 ffffea001c421a08 ffffea001c447e88 ffff8888181a91c0 [ 48.484141] raw: 0000000000000000 0000000000120012 00000001ffffffff 0000000000000000 [ 48.484188] page dumped because: kasan: bad access detected [ 48.484230] [ 48.484265] Memory state around the buggy address: [ 48.484314] ffff88870fc18f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 48.484361] ffff88870fc18f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 48.484406] >ffff88870fc19000: fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc [ 48.484451] ^ [ 48.484494] ffff88870fc19080: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 48.484530] ffff88870fc19100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108602 Fixes: e60aa7b53845 ("iommu/iova: Extend rbtree node caching") Signed-off-by: Chris Wilson Cc: Robin Murphy Cc: Joerg Roedel Cc: Joerg Roedel Cc: # v4.15+ Reviewed-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 8413ae54904a..3e1a8a675572 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -137,8 +137,9 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) struct iova *cached_iova; cached_iova = rb_entry(iovad->cached32_node, struct iova, node); - if (free->pfn_hi < iovad->dma_32bit_pfn && - free->pfn_lo >= cached_iova->pfn_lo) { + if (free == cached_iova || + (free->pfn_hi < iovad->dma_32bit_pfn && + free->pfn_lo >= cached_iova->pfn_lo)) { iovad->cached32_node = rb_next(&free->node); iovad->max32_alloc_size = iovad->dma_32bit_pfn; } From 7f6cade5b6bf47daa315118a05aab613c73462ff Mon Sep 17 00:00:00 2001 From: Sai Praneeth Prakhya Date: Sun, 21 Jul 2019 17:22:07 -0700 Subject: [PATCH 6/8] iommu/vt-d: Print pasid table entries MSB to LSB in debugfs Commit dd5142ca5d24 ("iommu/vt-d: Add debugfs support to show scalable mode DMAR table internals") prints content of pasid table entries from LSB to MSB where as other entries are printed MSB to LSB. So, to maintain uniformity among all entries and to not confuse the user, print MSB first. Cc: Joerg Roedel Cc: Lu Baolu Cc: Sohil Mehta Cc: Jacob Pan Signed-off-by: Sai Praneeth Prakhya Fixes: dd5142ca5d24 ("iommu/vt-d: Add debugfs support to show scalable mode DMAR table internals") Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu-debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c index 73a552914455..2b25d9c59336 100644 --- a/drivers/iommu/intel-iommu-debugfs.c +++ b/drivers/iommu/intel-iommu-debugfs.c @@ -162,9 +162,9 @@ static inline void print_tbl_walk(struct seq_file *m) (u64)0, (u64)0, (u64)0); else seq_printf(m, "%-6d\t0x%016llx:0x%016llx:0x%016llx\n", - tbl_wlk->pasid, tbl_wlk->pasid_tbl_entry->val[0], + tbl_wlk->pasid, tbl_wlk->pasid_tbl_entry->val[2], tbl_wlk->pasid_tbl_entry->val[1], - tbl_wlk->pasid_tbl_entry->val[2]); + tbl_wlk->pasid_tbl_entry->val[0]); } static void pasid_tbl_walk(struct seq_file *m, struct pasid_entry *tbl_entry, From 201c1db90cd643282185a00770f12f95da330eca Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 23 Jul 2019 09:51:00 +0200 Subject: [PATCH 7/8] iommu/iova: Fix compilation error with !CONFIG_IOMMU_IOVA The stub function for !CONFIG_IOMMU_IOVA needs to be 'static inline'. Fixes: effa467870c76 ('iommu/vt-d: Don't queue_iova() if there is no flush queue') Signed-off-by: Joerg Roedel --- include/linux/iova.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/iova.h b/include/linux/iova.h index cd0f1de901a8..a0637abffee8 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -236,7 +236,7 @@ static inline void init_iova_domain(struct iova_domain *iovad, { } -bool has_iova_flush_queue(struct iova_domain *iovad) +static inline bool has_iova_flush_queue(struct iova_domain *iovad) { return false; } From 66929812955bbec808c94d7a3916f41638a98a0a Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Tue, 16 Jul 2019 04:29:16 +0000 Subject: [PATCH 8/8] iommu/amd: Add support for X2APIC IOMMU interrupts AMD IOMMU requires IntCapXT registers to be setup in order to generate its own interrupts (for Event Log, PPR Log, and GA Log) with 32-bit APIC destination ID. Without this support, AMD IOMMU MSI interrupts will not be routed correctly when booting the system in X2APIC mode. Cc: Joerg Roedel Fixes: 90fcffd9cf5e ('iommu/amd: Add support for IOMMU XT mode') Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 90 +++++++++++++++++++++++++++++++++ drivers/iommu/amd_iommu_types.h | 9 ++++ 2 files changed, 99 insertions(+) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index eb104c719629..4413aa67000e 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include #include #include @@ -1920,6 +1922,90 @@ static int iommu_setup_msi(struct amd_iommu *iommu) return 0; } +#define XT_INT_DEST_MODE(x) (((x) & 0x1ULL) << 2) +#define XT_INT_DEST_LO(x) (((x) & 0xFFFFFFULL) << 8) +#define XT_INT_VEC(x) (((x) & 0xFFULL) << 32) +#define XT_INT_DEST_HI(x) ((((x) >> 24) & 0xFFULL) << 56) + +/** + * Setup the IntCapXT registers with interrupt routing information + * based on the PCI MSI capability block registers, accessed via + * MMIO MSI address low/hi and MSI data registers. + */ +static void iommu_update_intcapxt(struct amd_iommu *iommu) +{ + u64 val; + u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET); + u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET); + u32 data = readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET); + bool dm = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1; + u32 dest = ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF); + + if (x2apic_enabled()) + dest |= MSI_ADDR_EXT_DEST_ID(addr_hi); + + val = XT_INT_VEC(data & 0xFF) | + XT_INT_DEST_MODE(dm) | + XT_INT_DEST_LO(dest) | + XT_INT_DEST_HI(dest); + + /** + * Current IOMMU implemtation uses the same IRQ for all + * 3 IOMMU interrupts. + */ + writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); + writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); + writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); +} + +static void _irq_notifier_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct amd_iommu *iommu; + + for_each_iommu(iommu) { + if (iommu->dev->irq == notify->irq) { + iommu_update_intcapxt(iommu); + break; + } + } +} + +static void _irq_notifier_release(struct kref *ref) +{ +} + +static int iommu_init_intcapxt(struct amd_iommu *iommu) +{ + int ret; + struct irq_affinity_notify *notify = &iommu->intcapxt_notify; + + /** + * IntCapXT requires XTSup=1, which can be inferred + * amd_iommu_xt_mode. + */ + if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE) + return 0; + + /** + * Also, we need to setup notifier to update the IntCapXT registers + * whenever the irq affinity is changed from user-space. + */ + notify->irq = iommu->dev->irq; + notify->notify = _irq_notifier_notify, + notify->release = _irq_notifier_release, + ret = irq_set_affinity_notifier(iommu->dev->irq, notify); + if (ret) { + pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n", + iommu->devid, iommu->dev->irq); + return ret; + } + + iommu_update_intcapxt(iommu); + iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); + return ret; +} + static int iommu_init_msi(struct amd_iommu *iommu) { int ret; @@ -1936,6 +2022,10 @@ static int iommu_init_msi(struct amd_iommu *iommu) return ret; enable_faults: + ret = iommu_init_intcapxt(iommu); + if (ret) + return ret; + iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); if (iommu->ppr_log != NULL) diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 52c35d557fad..64edd5a9694c 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -60,6 +60,12 @@ #define MMIO_PPR_LOG_OFFSET 0x0038 #define MMIO_GA_LOG_BASE_OFFSET 0x00e0 #define MMIO_GA_LOG_TAIL_OFFSET 0x00e8 +#define MMIO_MSI_ADDR_LO_OFFSET 0x015C +#define MMIO_MSI_ADDR_HI_OFFSET 0x0160 +#define MMIO_MSI_DATA_OFFSET 0x0164 +#define MMIO_INTCAPXT_EVT_OFFSET 0x0170 +#define MMIO_INTCAPXT_PPR_OFFSET 0x0178 +#define MMIO_INTCAPXT_GALOG_OFFSET 0x0180 #define MMIO_CMD_HEAD_OFFSET 0x2000 #define MMIO_CMD_TAIL_OFFSET 0x2008 #define MMIO_EVT_HEAD_OFFSET 0x2010 @@ -150,6 +156,7 @@ #define CONTROL_GALOG_EN 0x1CULL #define CONTROL_GAINT_EN 0x1DULL #define CONTROL_XT_EN 0x32ULL +#define CONTROL_INTCAPXT_EN 0x33ULL #define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT) #define CTRL_INV_TO_NONE 0 @@ -592,6 +599,8 @@ struct amd_iommu { /* DebugFS Info */ struct dentry *debugfs; #endif + /* IRQ notifier for IntCapXT interrupt */ + struct irq_affinity_notify intcapxt_notify; }; static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev)