From 2c2e2c389d03bb16b8cdf9db3ac615385fac100f Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 19 Jun 2009 13:47:29 -0700 Subject: [PATCH 1/3] IOMMU Identity Mapping Support (drivers/pci/intel_iommu.c) Identity mapping for IOMMU defines a single domain to 1:1 map all PCI devices to all usable memory. This reduces map/unmap overhead in DMA API's and improve IOMMU performance. On 10Gb network cards, Netperf shows no performance degradation compared to non-IOMMU performance. This method may lose some of DMA remapping benefits like isolation. The patch sets up identity mapping for all PCI devices to all usable memory. In the DMA API, there is no overhead to maintain page tables, invalidate iotlb, flush cache etc. 32 bit DMA devices don't use identity mapping domain, in order to access memory beyond 4GiB. When kernel option iommu=pt, pass through is first tried. If pass through succeeds, IOMMU goes to pass through. If pass through is not supported in hw or fail for whatever reason, IOMMU goes to identity mapping. Signed-off-by: Fenghua Yu Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 332 ++++++++++++++++++++++++++++++-------- 1 file changed, 263 insertions(+), 69 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 178853a07440..e53eacd75c8d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "pci.h" #define ROOT_SIZE VTD_PAGE_SIZE @@ -217,6 +218,14 @@ static inline bool dma_pte_present(struct dma_pte *pte) return (pte->val & 3) != 0; } +/* + * This domain is a statically identity mapping domain. + * 1. This domain creats a static 1:1 mapping to all usable memory. + * 2. It maps to each iommu if successful. + * 3. Each iommu mapps to this domain if successful. + */ +struct dmar_domain *si_domain; + /* devices under the same p2p bridge are owned in one domain */ #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) @@ -225,6 +234,9 @@ static inline bool dma_pte_present(struct dma_pte *pte) */ #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1) +/* si_domain contains mulitple devices */ +#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2) + struct dmar_domain { int id; /* domain id */ unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ @@ -435,12 +447,14 @@ int iommu_calculate_agaw(struct intel_iommu *iommu) return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); } -/* in native case, each domain is related to only one iommu */ +/* This functionin only returns single iommu in a domain */ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) { int iommu_id; + /* si_domain and vm domain should not get here. */ BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE); + BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY); iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); if (iommu_id < 0 || iommu_id >= g_num_of_iommus) @@ -1189,48 +1203,71 @@ void free_dmar_iommu(struct intel_iommu *iommu) free_context_table(iommu); } -static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) +static struct dmar_domain *alloc_domain(void) { - unsigned long num; - unsigned long ndomains; struct dmar_domain *domain; - unsigned long flags; domain = alloc_domain_mem(); if (!domain) return NULL; - ndomains = cap_ndoms(iommu->cap); - - spin_lock_irqsave(&iommu->lock, flags); - num = find_first_zero_bit(iommu->domain_ids, ndomains); - if (num >= ndomains) { - spin_unlock_irqrestore(&iommu->lock, flags); - free_domain_mem(domain); - printk(KERN_ERR "IOMMU: no free domain ids\n"); - return NULL; - } - - set_bit(num, iommu->domain_ids); - domain->id = num; memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); - set_bit(iommu->seq_id, &domain->iommu_bmp); domain->flags = 0; - iommu->domains[num] = domain; - spin_unlock_irqrestore(&iommu->lock, flags); return domain; } -static void iommu_free_domain(struct dmar_domain *domain) +static int iommu_attach_domain(struct dmar_domain *domain, + struct intel_iommu *iommu) { + int num; + unsigned long ndomains; unsigned long flags; - struct intel_iommu *iommu; - iommu = domain_get_iommu(domain); + ndomains = cap_ndoms(iommu->cap); spin_lock_irqsave(&iommu->lock, flags); - clear_bit(domain->id, iommu->domain_ids); + + num = find_first_zero_bit(iommu->domain_ids, ndomains); + if (num >= ndomains) { + spin_unlock_irqrestore(&iommu->lock, flags); + printk(KERN_ERR "IOMMU: no free domain ids\n"); + return -ENOMEM; + } + + domain->id = num; + set_bit(num, iommu->domain_ids); + set_bit(iommu->seq_id, &domain->iommu_bmp); + iommu->domains[num] = domain; + spin_unlock_irqrestore(&iommu->lock, flags); + + return 0; +} + +static void iommu_detach_domain(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + unsigned long flags; + int num, ndomains; + int found = 0; + + spin_lock_irqsave(&iommu->lock, flags); + ndomains = cap_ndoms(iommu->cap); + num = find_first_bit(iommu->domain_ids, ndomains); + for (; num < ndomains; ) { + if (iommu->domains[num] == domain) { + found = 1; + break; + } + num = find_next_bit(iommu->domain_ids, + cap_ndoms(iommu->cap), num+1); + } + + if (found) { + clear_bit(num, iommu->domain_ids); + clear_bit(iommu->seq_id, &domain->iommu_bmp); + iommu->domains[num] = NULL; + } spin_unlock_irqrestore(&iommu->lock, flags); } @@ -1350,6 +1387,8 @@ static int domain_init(struct dmar_domain *domain, int guest_width) static void domain_exit(struct dmar_domain *domain) { + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; u64 end; /* Domain 0 is reserved, so dont process it */ @@ -1368,7 +1407,10 @@ static void domain_exit(struct dmar_domain *domain) /* free page tables */ dma_pte_free_pagetable(domain, 0, end); - iommu_free_domain(domain); + for_each_active_iommu(iommu, drhd) + if (test_bit(iommu->seq_id, &domain->iommu_bmp)) + iommu_detach_domain(domain, iommu); + free_domain_mem(domain); } @@ -1408,7 +1450,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, id = domain->id; pgd = domain->pgd; - if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || + domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) { int found = 0; /* find an available domain id for this device in iommu */ @@ -1433,6 +1476,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, } set_bit(num, iommu->domain_ids); + set_bit(iommu->seq_id, &domain->iommu_bmp); iommu->domains[num] = domain; id = num; } @@ -1675,6 +1719,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) unsigned long flags; int bus = 0, devfn = 0; int segment; + int ret; domain = find_domain(pdev); if (domain) @@ -1707,6 +1752,10 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) } } + domain = alloc_domain(); + if (!domain) + goto error; + /* Allocate new domain for the device */ drhd = dmar_find_matched_drhd_unit(pdev); if (!drhd) { @@ -1716,9 +1765,11 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) } iommu = drhd->iommu; - domain = iommu_alloc_domain(iommu); - if (!domain) + ret = iommu_attach_domain(domain, iommu); + if (ret) { + domain_exit(domain); goto error; + } if (domain_init(domain, gaw)) { domain_exit(domain); @@ -1792,6 +1843,8 @@ error: return find_domain(pdev); } +static int iommu_identity_mapping; + static int iommu_prepare_identity_map(struct pci_dev *pdev, unsigned long long start, unsigned long long end) @@ -1804,8 +1857,11 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, printk(KERN_INFO "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", pci_name(pdev), start, end); - /* page table init */ - domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + if (iommu_identity_mapping) + domain = si_domain; + else + /* page table init */ + domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) return -ENOMEM; @@ -1952,7 +2008,110 @@ static int __init init_context_pass_through(void) return 0; } -static int __init init_dmars(void) +static int md_domain_init(struct dmar_domain *domain, int guest_width); +static int si_domain_init(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + int ret = 0; + + si_domain = alloc_domain(); + if (!si_domain) + return -EFAULT; + + + for_each_active_iommu(iommu, drhd) { + ret = iommu_attach_domain(si_domain, iommu); + if (ret) { + domain_exit(si_domain); + return -EFAULT; + } + } + + if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + domain_exit(si_domain); + return -EFAULT; + } + + si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; + + return 0; +} + +static void domain_remove_one_dev_info(struct dmar_domain *domain, + struct pci_dev *pdev); +static int identity_mapping(struct pci_dev *pdev) +{ + struct device_domain_info *info; + + if (likely(!iommu_identity_mapping)) + return 0; + + + list_for_each_entry(info, &si_domain->devices, link) + if (info->dev == pdev) + return 1; + return 0; +} + +static int domain_add_dev_info(struct dmar_domain *domain, + struct pci_dev *pdev) +{ + struct device_domain_info *info; + unsigned long flags; + + info = alloc_devinfo_mem(); + if (!info) + return -ENOMEM; + + info->segment = pci_domain_nr(pdev->bus); + info->bus = pdev->bus->number; + info->devfn = pdev->devfn; + info->dev = pdev; + info->domain = domain; + + spin_lock_irqsave(&device_domain_lock, flags); + list_add(&info->link, &domain->devices); + list_add(&info->global, &device_domain_list); + pdev->dev.archdata.iommu = info; + spin_unlock_irqrestore(&device_domain_lock, flags); + + return 0; +} + +static int iommu_prepare_static_identity_mapping(void) +{ + int i; + struct pci_dev *pdev = NULL; + int ret; + + ret = si_domain_init(); + if (ret) + return -EFAULT; + + printk(KERN_INFO "IOMMU: Setting identity map:\n"); + for_each_pci_dev(pdev) { + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (ei->type == E820_RAM) { + ret = iommu_prepare_identity_map(pdev, + ei->addr, ei->addr + ei->size); + if (ret) { + printk(KERN_INFO "1:1 mapping to one domain failed.\n"); + return -EFAULT; + } + } + } + ret = domain_add_dev_info(si_domain, pdev); + if (ret) + return ret; + } + + return 0; +} + +int __init init_dmars(void) { struct dmar_drhd_unit *drhd; struct dmar_rmrr_unit *rmrr; @@ -1961,6 +2120,13 @@ static int __init init_dmars(void) int i, ret; int pass_through = 1; + /* + * In case pass through can not be enabled, iommu tries to use identity + * mapping. + */ + if (iommu_pass_through) + iommu_identity_mapping = 1; + /* * for each drhd * allocate root @@ -2090,9 +2256,12 @@ static int __init init_dmars(void) /* * If pass through is not set or not enabled, setup context entries for - * identity mappings for rmrr, gfx, and isa. + * identity mappings for rmrr, gfx, and isa and may fall back to static + * identity mapping if iommu_identity_mapping is set. */ if (!iommu_pass_through) { + if (iommu_identity_mapping) + iommu_prepare_static_identity_mapping(); /* * For each rmrr * for each dev attached to rmrr @@ -2107,6 +2276,7 @@ static int __init init_dmars(void) * endfor * endfor */ + printk(KERN_INFO "IOMMU: Setting RMRR:\n"); for_each_rmrr_units(rmrr) { for (i = 0; i < rmrr->devices_cnt; i++) { pdev = rmrr->devices[i]; @@ -2248,6 +2418,52 @@ get_valid_domain_for_dev(struct pci_dev *pdev) return domain; } +static int iommu_dummy(struct pci_dev *pdev) +{ + return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; +} + +/* Check if the pdev needs to go through non-identity map and unmap process.*/ +static int iommu_no_mapping(struct pci_dev *pdev) +{ + int found; + + if (!iommu_identity_mapping) + return iommu_dummy(pdev); + + found = identity_mapping(pdev); + if (found) { + if (pdev->dma_mask > DMA_BIT_MASK(32)) + return 1; + else { + /* + * 32 bit DMA is removed from si_domain and fall back + * to non-identity mapping. + */ + domain_remove_one_dev_info(si_domain, pdev); + printk(KERN_INFO "32bit %s uses non-identity mapping\n", + pci_name(pdev)); + return 0; + } + } else { + /* + * In case of a detached 64 bit DMA device from vm, the device + * is put into si_domain for identity mapping. + */ + if (pdev->dma_mask > DMA_BIT_MASK(32)) { + int ret; + ret = domain_add_dev_info(si_domain, pdev); + if (!ret) { + printk(KERN_INFO "64bit %s uses identity mapping\n", + pci_name(pdev)); + return 1; + } + } + } + + return iommu_dummy(pdev); +} + static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir, u64 dma_mask) { @@ -2260,7 +2476,8 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); - if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) + + if (iommu_no_mapping(pdev)) return paddr; domain = get_valid_domain_for_dev(pdev); @@ -2401,8 +2618,9 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, struct iova *iova; struct intel_iommu *iommu; - if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) + if (iommu_no_mapping(pdev)) return; + domain = find_domain(pdev); BUG_ON(!domain); @@ -2492,7 +2710,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, struct scatterlist *sg; struct intel_iommu *iommu; - if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) + if (iommu_no_mapping(pdev)) return; domain = find_domain(pdev); @@ -2553,7 +2771,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); - if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) + if (iommu_no_mapping(pdev)) return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); domain = get_valid_domain_for_dev(pdev); @@ -2951,31 +3169,6 @@ int __init intel_iommu_init(void) return 0; } -static int vm_domain_add_dev_info(struct dmar_domain *domain, - struct pci_dev *pdev) -{ - struct device_domain_info *info; - unsigned long flags; - - info = alloc_devinfo_mem(); - if (!info) - return -ENOMEM; - - info->segment = pci_domain_nr(pdev->bus); - info->bus = pdev->bus->number; - info->devfn = pdev->devfn; - info->dev = pdev; - info->domain = domain; - - spin_lock_irqsave(&device_domain_lock, flags); - list_add(&info->link, &domain->devices); - list_add(&info->global, &device_domain_list); - pdev->dev.archdata.iommu = info; - spin_unlock_irqrestore(&device_domain_lock, flags); - - return 0; -} - static void iommu_detach_dependent_devices(struct intel_iommu *iommu, struct pci_dev *pdev) { @@ -3003,7 +3196,7 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu, } } -static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, +static void domain_remove_one_dev_info(struct dmar_domain *domain, struct pci_dev *pdev) { struct device_domain_info *info; @@ -3136,7 +3329,7 @@ static struct dmar_domain *iommu_alloc_vm_domain(void) return domain; } -static int vm_domain_init(struct dmar_domain *domain, int guest_width) +static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; @@ -3227,7 +3420,7 @@ static int intel_iommu_domain_init(struct iommu_domain *domain) "intel_iommu_domain_init: dmar_domain == NULL\n"); return -ENOMEM; } - if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { printk(KERN_ERR "intel_iommu_domain_init() failed\n"); vm_domain_exit(dmar_domain); @@ -3262,8 +3455,9 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, old_domain = find_domain(pdev); if (old_domain) { - if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) - vm_domain_remove_one_dev_info(old_domain, pdev); + if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || + dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) + domain_remove_one_dev_info(old_domain, pdev); else domain_remove_dev_info(old_domain); } @@ -3285,7 +3479,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return -EFAULT; } - ret = vm_domain_add_dev_info(dmar_domain, pdev); + ret = domain_add_dev_info(dmar_domain, pdev); if (ret) return ret; @@ -3299,7 +3493,7 @@ static void intel_iommu_detach_device(struct iommu_domain *domain, struct dmar_domain *dmar_domain = domain->priv; struct pci_dev *pdev = to_pci_dev(dev); - vm_domain_remove_one_dev_info(dmar_domain, pdev); + domain_remove_one_dev_info(dmar_domain, pdev); } static int intel_iommu_map_range(struct iommu_domain *domain, From c4658b4e777bebf69884f4884a9bfb2f84dd71d9 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Sat, 23 May 2009 00:41:14 +0800 Subject: [PATCH 2/3] Intel-IOMMU, intr-remap: set the whole 128bits of irte when modify/free it Interrupt remapping table entry is 128bits. Currently, it only sets low 64bits of irte in modify_irte and free_irte. This ignores high 64bits setting of irte, that means source-id setting will be ignored. This patch sets the whole 128bits of irte when modify/free it. Following source-id checking patch depends on this. Signed-off-by: Weidong Han Signed-off-by: David Woodhouse --- drivers/pci/intr_remapping.c | 40 +++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 1e83c8c5f985..44025a0c2bb6 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -314,7 +314,8 @@ int modify_irte(int irq, struct irte *irte_modified) index = irq_iommu->irte_index + irq_iommu->sub_handle; irte = &iommu->ir_table->base[index]; - set_64bit((unsigned long *)irte, irte_modified->low); + set_64bit((unsigned long *)&irte->low, irte_modified->low); + set_64bit((unsigned long *)&irte->high, irte_modified->high); __iommu_flush_cache(iommu, irte, sizeof(*irte)); rc = qi_flush_iec(iommu, index, 0); @@ -369,12 +370,32 @@ struct intel_iommu *map_dev_to_ir(struct pci_dev *dev) return drhd->iommu; } +static int clear_entries(struct irq_2_iommu *irq_iommu) +{ + struct irte *start, *entry, *end; + struct intel_iommu *iommu; + int index; + + if (irq_iommu->sub_handle) + return 0; + + iommu = irq_iommu->iommu; + index = irq_iommu->irte_index + irq_iommu->sub_handle; + + start = iommu->ir_table->base + index; + end = start + (1 << irq_iommu->irte_mask); + + for (entry = start; entry < end; entry++) { + set_64bit((unsigned long *)&entry->low, 0); + set_64bit((unsigned long *)&entry->high, 0); + } + + return qi_flush_iec(iommu, index, irq_iommu->irte_mask); +} + int free_irte(int irq) { int rc = 0; - int index, i; - struct irte *irte; - struct intel_iommu *iommu; struct irq_2_iommu *irq_iommu; unsigned long flags; @@ -385,16 +406,7 @@ int free_irte(int irq) return -1; } - iommu = irq_iommu->iommu; - - index = irq_iommu->irte_index + irq_iommu->sub_handle; - irte = &iommu->ir_table->base[index]; - - if (!irq_iommu->sub_handle) { - for (i = 0; i < (1 << irq_iommu->irte_mask); i++) - set_64bit((unsigned long *)(irte + i), 0); - rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask); - } + rc = clear_entries(irq_iommu); irq_iommu->iommu = NULL; irq_iommu->irte_index = 0; From f007e99c8e2e322b8331aba72414715119a2920d Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Sat, 23 May 2009 00:41:15 +0800 Subject: [PATCH 3/3] Intel-IOMMU, intr-remap: source-id checking To support domain-isolation usages, the platform hardware must be capable of uniquely identifying the requestor (source-id) for each interrupt message. Without source-id checking for interrupt remapping , a rouge guest/VM with assigned devices can launch interrupt attacks to bring down anothe guest/VM or the VMM itself. This patch adds source-id checking for interrupt remapping, and then really isolates interrupts for guests/VMs with assigned devices. Because PCI subsystem is not initialized yet when set up IOAPIC entries, use read_pci_config_byte to access PCI config space directly. Signed-off-by: Weidong Han Signed-off-by: David Woodhouse --- arch/x86/kernel/apic/io_apic.c | 6 ++ drivers/pci/intr_remapping.c | 120 ++++++++++++++++++++++++++++++++- drivers/pci/intr_remapping.h | 2 + include/linux/dmar.h | 11 +++ 4 files changed, 136 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index b7a79207295e..4d0216fcb36c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1414,6 +1414,9 @@ int setup_ioapic_entry(int apic_id, int irq, irte.vector = vector; irte.dest_id = IRTE_DEST(destination); + /* Set source-id of interrupt request */ + set_ioapic_sid(&irte, apic_id); + modify_irte(irq, &irte); ir_entry->index2 = (index >> 15) & 0x1; @@ -3290,6 +3293,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); + /* Set source-id of interrupt request */ + set_msi_sid(&irte, pdev); + modify_irte(irq, &irte); msg->address_hi = MSI_ADDR_BASE_HI; diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 44025a0c2bb6..4f5b8712931f 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -10,6 +10,8 @@ #include #include "intr_remapping.h" #include +#include +#include "pci.h" static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static int ir_ioapic_num; @@ -418,6 +420,91 @@ int free_irte(int irq) return rc; } +/* + * source validation type + */ +#define SVT_NO_VERIFY 0x0 /* no verification is required */ +#define SVT_VERIFY_SID_SQ 0x1 /* verify using SID and SQ fiels */ +#define SVT_VERIFY_BUS 0x2 /* verify bus of request-id */ + +/* + * source-id qualifier + */ +#define SQ_ALL_16 0x0 /* verify all 16 bits of request-id */ +#define SQ_13_IGNORE_1 0x1 /* verify most significant 13 bits, ignore + * the third least significant bit + */ +#define SQ_13_IGNORE_2 0x2 /* verify most significant 13 bits, ignore + * the second and third least significant bits + */ +#define SQ_13_IGNORE_3 0x3 /* verify most significant 13 bits, ignore + * the least three significant bits + */ + +/* + * set SVT, SQ and SID fields of irte to verify + * source ids of interrupt requests + */ +static void set_irte_sid(struct irte *irte, unsigned int svt, + unsigned int sq, unsigned int sid) +{ + irte->svt = svt; + irte->sq = sq; + irte->sid = sid; +} + +int set_ioapic_sid(struct irte *irte, int apic) +{ + int i; + u16 sid = 0; + + if (!irte) + return -1; + + for (i = 0; i < MAX_IO_APICS; i++) { + if (ir_ioapic[i].id == apic) { + sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn; + break; + } + } + + if (sid == 0) { + pr_warning("Failed to set source-id of IOAPIC (%d)\n", apic); + return -1; + } + + set_irte_sid(irte, 1, 0, sid); + + return 0; +} + +int set_msi_sid(struct irte *irte, struct pci_dev *dev) +{ + struct pci_dev *bridge; + + if (!irte || !dev) + return -1; + + /* PCIe device or Root Complex integrated PCI device */ + if (dev->is_pcie || !dev->bus->parent) { + set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, + (dev->bus->number << 8) | dev->devfn); + return 0; + } + + bridge = pci_find_upstream_pcie_bridge(dev); + if (bridge) { + if (bridge->is_pcie) /* this is a PCIE-to-PCI/PCIX bridge */ + set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16, + (bridge->bus->number << 8) | dev->bus->number); + else /* this is a legacy PCI bridge */ + set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, + (bridge->bus->number << 8) | bridge->devfn); + } + + return 0; +} + static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) { u64 addr; @@ -624,6 +711,35 @@ error: return -1; } +static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope, + struct intel_iommu *iommu) +{ + struct acpi_dmar_pci_path *path; + u8 bus; + int count; + + bus = scope->bus; + path = (struct acpi_dmar_pci_path *)(scope + 1); + count = (scope->length - sizeof(struct acpi_dmar_device_scope)) + / sizeof(struct acpi_dmar_pci_path); + + while (--count > 0) { + /* + * Access PCI directly due to the PCI + * subsystem isn't initialized yet. + */ + bus = read_pci_config_byte(bus, path->dev, path->fn, + PCI_SECONDARY_BUS); + path++; + } + + ir_ioapic[ir_ioapic_num].bus = bus; + ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->dev, path->fn); + ir_ioapic[ir_ioapic_num].iommu = iommu; + ir_ioapic[ir_ioapic_num].id = scope->enumeration_id; + ir_ioapic_num++; +} + static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, struct intel_iommu *iommu) { @@ -648,9 +764,7 @@ static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, " 0x%Lx\n", scope->enumeration_id, drhd->address); - ir_ioapic[ir_ioapic_num].iommu = iommu; - ir_ioapic[ir_ioapic_num].id = scope->enumeration_id; - ir_ioapic_num++; + ir_parse_one_ioapic_scope(scope, iommu); } start += scope->length; } diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h index ca48f0df8ac9..63a263c18415 100644 --- a/drivers/pci/intr_remapping.h +++ b/drivers/pci/intr_remapping.h @@ -3,6 +3,8 @@ struct ioapic_scope { struct intel_iommu *iommu; unsigned int id; + unsigned int bus; /* PCI bus number */ + unsigned int devfn; /* PCI devfn number */ }; #define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0) diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 1731fb5fd775..4a2b162c256a 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -126,6 +126,8 @@ extern int free_irte(int irq); extern int irq_remapped(int irq); extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); extern struct intel_iommu *map_ioapic_to_ir(int apic); +extern int set_ioapic_sid(struct irte *irte, int apic); +extern int set_msi_sid(struct irte *irte, struct pci_dev *dev); #else static inline int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) { @@ -156,6 +158,15 @@ static inline struct intel_iommu *map_ioapic_to_ir(int apic) { return NULL; } +static inline int set_ioapic_sid(struct irte *irte, int apic) +{ + return 0; +} +static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev) +{ + return 0; +} + #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) #define disable_intr_remapping() (0)