diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index ec562078247f..a64b6f3b9a66 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -456,8 +457,6 @@ static LIST_HEAD(dmar_rmrr_units); static void flush_unmaps_timeout(unsigned long data); -static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); - struct deferred_flush_entry { struct iova *iova; struct dmar_domain *domain; @@ -470,17 +469,19 @@ struct deferred_flush_table { struct deferred_flush_entry entries[HIGH_WATER_MARK]; }; -static struct deferred_flush_table *deferred_flush; +struct deferred_flush_data { + spinlock_t lock; + int timer_on; + struct timer_list timer; + long size; + struct deferred_flush_table *tables; +}; + +DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); /* bitmap for indexing intel_iommus */ static int g_num_of_iommus; -static DEFINE_SPINLOCK(async_umap_flush_lock); -static LIST_HEAD(unmaps_to_do); - -static int timer_on; -static long list_size; - static void domain_exit(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain); static void dmar_remove_one_dev_info(struct dmar_domain *domain, @@ -1922,8 +1923,12 @@ static void domain_exit(struct dmar_domain *domain) return; /* Flush any lazy unmaps that may reference this domain */ - if (!intel_iommu_strict) - flush_unmaps_timeout(0); + if (!intel_iommu_strict) { + int cpu; + + for_each_possible_cpu(cpu) + flush_unmaps_timeout(cpu); + } /* Remove associated devices and clear attached or cached domains */ rcu_read_lock(); @@ -3081,7 +3086,7 @@ static int __init init_dmars(void) bool copied_tables = false; struct device *dev; struct intel_iommu *iommu; - int i, ret; + int i, ret, cpu; /* * for each drhd @@ -3114,11 +3119,20 @@ static int __init init_dmars(void) goto error; } - deferred_flush = kzalloc(g_num_of_iommus * - sizeof(struct deferred_flush_table), GFP_KERNEL); - if (!deferred_flush) { - ret = -ENOMEM; - goto free_g_iommus; + for_each_possible_cpu(cpu) { + struct deferred_flush_data *dfd = per_cpu_ptr(&deferred_flush, + cpu); + + dfd->tables = kzalloc(g_num_of_iommus * + sizeof(struct deferred_flush_table), + GFP_KERNEL); + if (!dfd->tables) { + ret = -ENOMEM; + goto free_g_iommus; + } + + spin_lock_init(&dfd->lock); + setup_timer(&dfd->timer, flush_unmaps_timeout, cpu); } for_each_active_iommu(iommu, drhd) { @@ -3295,8 +3309,9 @@ free_iommu: disable_dmar_iommu(iommu); free_dmar_iommu(iommu); } - kfree(deferred_flush); free_g_iommus: + for_each_possible_cpu(cpu) + kfree(per_cpu_ptr(&deferred_flush, cpu)->tables); kfree(g_iommus); error: return ret; @@ -3501,29 +3516,31 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page, dir, *dev->dma_mask); } -static void flush_unmaps(void) +static void flush_unmaps(struct deferred_flush_data *flush_data) { int i, j; - timer_on = 0; + flush_data->timer_on = 0; /* just flush them all */ for (i = 0; i < g_num_of_iommus; i++) { struct intel_iommu *iommu = g_iommus[i]; + struct deferred_flush_table *flush_table = + &flush_data->tables[i]; if (!iommu) continue; - if (!deferred_flush[i].next) + if (!flush_table->next) continue; /* In caching mode, global flushes turn emulation expensive */ if (!cap_caching_mode(iommu->cap)) iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); - for (j = 0; j < deferred_flush[i].next; j++) { + for (j = 0; j < flush_table->next; j++) { unsigned long mask; struct deferred_flush_entry *entry = - &deferred_flush->entries[j]; + &flush_table->entries[j]; struct iova *iova = entry->iova; struct dmar_domain *domain = entry->domain; struct page *freelist = entry->freelist; @@ -3542,19 +3559,20 @@ static void flush_unmaps(void) if (freelist) dma_free_pagelist(freelist); } - deferred_flush[i].next = 0; + flush_table->next = 0; } - list_size = 0; + flush_data->size = 0; } -static void flush_unmaps_timeout(unsigned long data) +static void flush_unmaps_timeout(unsigned long cpuid) { + struct deferred_flush_data *flush_data = per_cpu_ptr(&deferred_flush, cpuid); unsigned long flags; - spin_lock_irqsave(&async_umap_flush_lock, flags); - flush_unmaps(); - spin_unlock_irqrestore(&async_umap_flush_lock, flags); + spin_lock_irqsave(&flush_data->lock, flags); + flush_unmaps(flush_data); + spin_unlock_irqrestore(&flush_data->lock, flags); } static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist) @@ -3563,28 +3581,44 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *f int entry_id, iommu_id; struct intel_iommu *iommu; struct deferred_flush_entry *entry; + struct deferred_flush_data *flush_data; + unsigned int cpuid; - spin_lock_irqsave(&async_umap_flush_lock, flags); - if (list_size == HIGH_WATER_MARK) - flush_unmaps(); + cpuid = get_cpu(); + flush_data = per_cpu_ptr(&deferred_flush, cpuid); + + /* Flush all CPUs' entries to avoid deferring too much. If + * this becomes a bottleneck, can just flush us, and rely on + * flush timer for the rest. + */ + if (flush_data->size == HIGH_WATER_MARK) { + int cpu; + + for_each_online_cpu(cpu) + flush_unmaps_timeout(cpu); + } + + spin_lock_irqsave(&flush_data->lock, flags); iommu = domain_get_iommu(dom); iommu_id = iommu->seq_id; - entry_id = deferred_flush[iommu_id].next; - ++(deferred_flush[iommu_id].next); + entry_id = flush_data->tables[iommu_id].next; + ++(flush_data->tables[iommu_id].next); - entry = &deferred_flush[iommu_id].entries[entry_id]; + entry = &flush_data->tables[iommu_id].entries[entry_id]; entry->domain = dom; entry->iova = iova; entry->freelist = freelist; - if (!timer_on) { - mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10)); - timer_on = 1; + if (!flush_data->timer_on) { + mod_timer(&flush_data->timer, jiffies + msecs_to_jiffies(10)); + flush_data->timer_on = 1; } - list_size++; - spin_unlock_irqrestore(&async_umap_flush_lock, flags); + flush_data->size++; + spin_unlock_irqrestore(&flush_data->lock, flags); + + put_cpu(); } static void intel_unmap(struct device *dev, dma_addr_t dev_addr) @@ -4508,6 +4542,23 @@ static struct notifier_block intel_iommu_memory_nb = { .priority = 0 }; +static int intel_iommu_cpu_notifier(struct notifier_block *nfb, + unsigned long action, void *v) +{ + unsigned int cpu = (unsigned long)v; + + switch (action) { + case CPU_DEAD: + case CPU_DEAD_FROZEN: + flush_unmaps_timeout(cpu); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block intel_iommu_cpu_nb = { + .notifier_call = intel_iommu_cpu_notifier, +}; static ssize_t intel_iommu_show_version(struct device *dev, struct device_attribute *attr, @@ -4641,7 +4692,6 @@ int __init intel_iommu_init(void) up_write(&dmar_global_lock); pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); - init_timer(&unmap_timer); #ifdef CONFIG_SWIOTLB swiotlb = 0; #endif @@ -4658,6 +4708,7 @@ int __init intel_iommu_init(void) bus_register_notifier(&pci_bus_type, &device_nb); if (si_domain && !hw_pass_through) register_memory_notifier(&intel_iommu_memory_nb); + register_hotcpu_notifier(&intel_iommu_cpu_nb); intel_iommu_enabled = 1;