diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h index 09d5f7fd9db1..3d52a5bbd857 100644 --- a/arch/ia64/include/asm/xen/interface.h +++ b/arch/ia64/include/asm/xen/interface.h @@ -67,6 +67,10 @@ #define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) #ifndef __ASSEMBLY__ +/* Explicitly size integers that represent pfns in the public interface + * with Xen so that we could have one ABI that works for 32 and 64 bit + * guests. */ +typedef unsigned long xen_pfn_t; /* Guest handles for primitive C types. */ __DEFINE_GUEST_HANDLE(uchar, unsigned char); __DEFINE_GUEST_HANDLE(uint, unsigned int); @@ -79,7 +83,6 @@ DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); DEFINE_GUEST_HANDLE(uint32_t); -typedef unsigned long xen_pfn_t; DEFINE_GUEST_HANDLE(xen_pfn_t); #define PRI_xen_pfn "lx" #endif @@ -265,6 +268,8 @@ typedef struct xen_callback xen_callback_t; #endif /* !__ASSEMBLY__ */ +#include + /* Size of the shared_info area (this is not related to page size). */ #define XSI_SHIFT 14 #define XSI_SIZE (1 << XSI_SHIFT) diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index cbf0c9d50b92..555f94d3637b 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -47,6 +47,10 @@ #endif #ifndef __ASSEMBLY__ +/* Explicitly size integers that represent pfns in the public interface + * with Xen so that on ARM we can have one ABI that works for 32 and 64 + * bit guests. */ +typedef unsigned long xen_pfn_t; /* Guest handles for primitive C types. */ __DEFINE_GUEST_HANDLE(uchar, unsigned char); __DEFINE_GUEST_HANDLE(uint, unsigned int); @@ -57,6 +61,7 @@ DEFINE_GUEST_HANDLE(long); DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); DEFINE_GUEST_HANDLE(uint32_t); +DEFINE_GUEST_HANDLE(xen_pfn_t); #endif #ifndef HYPERVISOR_VIRT_START @@ -121,6 +126,8 @@ struct arch_shared_info { #include "interface_64.h" #endif +#include + #ifndef __ASSEMBLY__ /* * The following is all CPU context. Note that the fpu_ctxt block is filled diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h index 1be1ab7d6a41..ee52fcac6f72 100644 --- a/arch/x86/include/asm/xen/swiotlb-xen.h +++ b/arch/x86/include/asm/xen/swiotlb-xen.h @@ -5,10 +5,12 @@ extern int xen_swiotlb; extern int __init pci_xen_swiotlb_detect(void); extern void __init pci_xen_swiotlb_init(void); +extern int pci_xen_swiotlb_init_late(void); #else #define xen_swiotlb (0) static inline int __init pci_xen_swiotlb_detect(void) { return 0; } static inline void __init pci_xen_swiotlb_init(void) { } +static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; } #endif #endif /* _ASM_X86_SWIOTLB_XEN_H */ diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index ec57bd3818a4..7005ced5d1ad 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -6,8 +6,9 @@ #include #include +#include "xen-ops.h" -unsigned int xen_io_apic_read(unsigned apic, unsigned reg) +static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) { struct physdev_apic apic_op; int ret; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 1fbe75a95f15..2d932c351f91 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -80,6 +80,8 @@ #include "smp.h" #include "multicalls.h" +#include + EXPORT_SYMBOL_GPL(hypercall_page); DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); @@ -1288,7 +1290,6 @@ asmlinkage void __init xen_start_kernel(void) { struct physdev_set_iopl set_iopl; int rc; - pgd_t *pgd; if (!xen_start_info) return; @@ -1380,8 +1381,6 @@ asmlinkage void __init xen_start_kernel(void) acpi_numa = -1; #endif - pgd = (pgd_t *)xen_start_info->pt_base; - /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; @@ -1390,7 +1389,7 @@ asmlinkage void __init xen_start_kernel(void) early_boot_irqs_disabled = true; xen_raw_console_write("mapping kernel into physical memory\n"); - pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); + xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); /* Allocate and initialize top and mid mfn levels for p2m structure */ xen_build_mfn_list_list(); @@ -1441,11 +1440,19 @@ asmlinkage void __init xen_start_kernel(void) const struct dom0_vga_console_info *info = (void *)((char *)xen_start_info + xen_start_info->console.dom0.info_off); + struct xen_platform_op op = { + .cmd = XENPF_firmware_info, + .interface_version = XENPF_INTERFACE_VERSION, + .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS, + }; xen_init_vga(info, xen_start_info->console.dom0.info_size); xen_start_info->console.domU.mfn = 0; xen_start_info->console.domU.evtchn = 0; + if (HYPERVISOR_dom0_op(&op) == 0) + boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags; + xen_init_apic(); /* Make sure ACS will be enabled */ diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 7a769b7526cb..5a16824cc2b3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -84,6 +84,7 @@ */ DEFINE_SPINLOCK(xen_reservation_lock); +#ifdef CONFIG_X86_32 /* * Identity map, in addition to plain kernel map. This needs to be * large enough to allocate page table pages to allocate the rest. @@ -91,7 +92,7 @@ DEFINE_SPINLOCK(xen_reservation_lock); */ #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); - +#endif #ifdef CONFIG_X86_64 /* l3 pud for userspace vsyscall mapping */ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; @@ -1176,13 +1177,6 @@ static void xen_exit_mmap(struct mm_struct *mm) static void xen_post_allocator_init(void); -static void __init xen_pagetable_init(void) -{ - paging_init(); - xen_setup_shared_info(); - xen_post_allocator_init(); -} - static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) { /* reserve the range used */ @@ -1197,6 +1191,87 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) } } +#ifdef CONFIG_X86_64 +static void __init xen_cleanhighmap(unsigned long vaddr, + unsigned long vaddr_end) +{ + unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; + pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr); + + /* NOTE: The loop is more greedy than the cleanup_highmap variant. + * We include the PMD passed in on _both_ boundaries. */ + for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE)); + pmd++, vaddr += PMD_SIZE) { + if (pmd_none(*pmd)) + continue; + if (vaddr < (unsigned long) _text || vaddr > kernel_end) + set_pmd(pmd, __pmd(0)); + } + /* In case we did something silly, we should crash in this function + * instead of somewhere later and be confusing. */ + xen_mc_flush(); +} +#endif +static void __init xen_pagetable_init(void) +{ +#ifdef CONFIG_X86_64 + unsigned long size; + unsigned long addr; +#endif + paging_init(); + xen_setup_shared_info(); +#ifdef CONFIG_X86_64 + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long new_mfn_list; + + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + + /* On 32-bit, we get zero so this never gets executed. */ + new_mfn_list = xen_revector_p2m_tree(); + if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { + /* using __ka address and sticking INVALID_P2M_ENTRY! */ + memset((void *)xen_start_info->mfn_list, 0xff, size); + + /* We should be in __ka space. */ + BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); + addr = xen_start_info->mfn_list; + /* We roundup to the PMD, which means that if anybody at this stage is + * using the __ka address of xen_start_info or xen_start_info->shared_info + * they are in going to crash. Fortunatly we have already revectored + * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ + size = roundup(size, PMD_SIZE); + xen_cleanhighmap(addr, addr + size); + + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + memblock_free(__pa(xen_start_info->mfn_list), size); + /* And revector! Bye bye old array */ + xen_start_info->mfn_list = new_mfn_list; + } else + goto skip; + } + /* At this stage, cleanup_highmap has already cleaned __ka space + * from _brk_limit way up to the max_pfn_mapped (which is the end of + * the ramdisk). We continue on, erasing PMD entries that point to page + * tables - do note that they are accessible at this stage via __va. + * For good measure we also round up to the PMD - which means that if + * anybody is using __ka address to the initial boot-stack - and try + * to use it - they are going to crash. The xen_start_info has been + * taken care of already in xen_setup_kernel_pagetable. */ + addr = xen_start_info->pt_base; + size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE); + + xen_cleanhighmap(addr, addr + size); + xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); +#ifdef DEBUG + /* This is superflous and is not neccessary, but you know what + * lets do it. The MODULES_VADDR -> MODULES_END should be clear of + * anything at this stage. */ + xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); +#endif +skip: +#endif + xen_post_allocator_init(); +} static void xen_write_cr2(unsigned long cr2) { this_cpu_read(xen_vcpu)->arch.cr2 = cr2; @@ -1652,7 +1727,7 @@ static void set_page_prot(void *addr, pgprot_t prot) if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) BUG(); } - +#ifdef CONFIG_X86_32 static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) { unsigned pmdidx, pteidx; @@ -1703,7 +1778,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) set_page_prot(pmd, PAGE_KERNEL_RO); } - +#endif void __init xen_setup_machphys_mapping(void) { struct xen_machphys_mapping mapping; @@ -1731,7 +1806,20 @@ static void convert_pfn_mfn(void *v) for (i = 0; i < PTRS_PER_PTE; i++) pte[i] = xen_make_pte(pte[i].pte); } - +static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, + unsigned long addr) +{ + if (*pt_base == PFN_DOWN(__pa(addr))) { + set_page_prot((void *)addr, PAGE_KERNEL); + clear_page((void *)addr); + (*pt_base)++; + } + if (*pt_end == PFN_DOWN(__pa(addr))) { + set_page_prot((void *)addr, PAGE_KERNEL); + clear_page((void *)addr); + (*pt_end)--; + } +} /* * Set up the initial kernel pagetable. * @@ -1743,11 +1831,13 @@ static void convert_pfn_mfn(void *v) * of the physical mapping once some sort of allocator has been set * up. */ -pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) +void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pud_t *l3; pmd_t *l2; + unsigned long addr[3]; + unsigned long pt_base, pt_end; + unsigned i; /* max_pfn_mapped is the last pfn mapped in the initial memory * mappings. Considering that on Xen after the kernel mappings we @@ -1755,32 +1845,53 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, * set max_pfn_mapped to the last real pfn mapped. */ max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); + pt_base = PFN_DOWN(__pa(xen_start_info->pt_base)); + pt_end = pt_base + xen_start_info->nr_pt_frames; + /* Zap identity mapping */ init_level4_pgt[0] = __pgd(0); /* Pre-constructed entries are in pfn, so convert to mfn */ + /* L4[272] -> level3_ident_pgt + * L4[511] -> level3_kernel_pgt */ convert_pfn_mfn(init_level4_pgt); + + /* L3_i[0] -> level2_ident_pgt */ convert_pfn_mfn(level3_ident_pgt); + /* L3_k[510] -> level2_kernel_pgt + * L3_i[511] -> level2_fixmap_pgt */ convert_pfn_mfn(level3_kernel_pgt); + /* We get [511][511] and have Xen's version of level2_kernel_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); - memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + addr[0] = (unsigned long)pgd; + addr[1] = (unsigned long)l3; + addr[2] = (unsigned long)l2; + /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: + * Both L4[272][0] and L4[511][511] have entries that point to the same + * L2 (PMD) tables. Meaning that if you modify it in __va space + * it will be also modified in the __ka space! (But if you just + * modify the PMD table to point to other PTE's or none, then you + * are OK - which is what cleanup_highmap does) */ + copy_page(level2_ident_pgt, l2); + /* Graft it onto L4[511][511] */ + copy_page(level2_kernel_pgt, l2); + /* Get [511][510] and graft that in level2_fixmap_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); - memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - - /* Set up identity map */ - xen_map_identity_early(level2_ident_pgt, max_pfn); + copy_page(level2_fixmap_pgt, l2); + /* Note that we don't do anything with level1_fixmap_pgt which + * we don't need. */ /* Make pagetable pieces RO */ set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); @@ -1791,22 +1902,28 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, /* Unpin Xen-provided one */ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - /* Switch over */ - pgd = init_level4_pgt; - /* * At this stage there can be no user pgd, and no page * structure to attach it to, so make sure we just set kernel * pgd. */ xen_mc_batch(); - __xen_write_cr3(true, __pa(pgd)); + __xen_write_cr3(true, __pa(init_level4_pgt)); xen_mc_issue(PARAVIRT_LAZY_CPU); - memblock_reserve(__pa(xen_start_info->pt_base), - xen_start_info->nr_pt_frames * PAGE_SIZE); + /* We can't that easily rip out L3 and L2, as the Xen pagetables are + * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for + * the initial domain. For guests using the toolstack, they are in: + * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only + * rip out the [L4] (pgd), but for guests we shave off three pages. + */ + for (i = 0; i < ARRAY_SIZE(addr); i++) + check_pt_base(&pt_base, &pt_end, addr[i]); - return pgd; + /* Our (by three pages) smaller Xen pagetable that we are using */ + memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE); + /* Revector the xen_start_info */ + xen_start_info = (struct start_info *)__va(__pa(xen_start_info)); } #else /* !CONFIG_X86_64 */ static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); @@ -1831,8 +1948,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) */ swapper_kernel_pmd = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - memcpy(swapper_kernel_pmd, initial_kernel_pmd, - sizeof(pmd_t) * PTRS_PER_PMD); + copy_page(swapper_kernel_pmd, initial_kernel_pmd); swapper_pg_dir[KERNEL_PGD_BOUNDARY] = __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); @@ -1849,8 +1965,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) pv_mmu_ops.write_cr3 = &xen_write_cr3; } -pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) +void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pmd_t *kernel_pmd; @@ -1862,11 +1977,11 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, 512*1024); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); + copy_page(initial_kernel_pmd, kernel_pmd); xen_map_identity_early(initial_kernel_pmd, max_pfn); - memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); + copy_page(initial_page_table, pgd); initial_page_table[KERNEL_PGD_BOUNDARY] = __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); @@ -1882,8 +1997,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, memblock_reserve(__pa(xen_start_info->pt_base), xen_start_info->nr_pt_frames * PAGE_SIZE); - - return initial_page_table; } #endif /* CONFIG_X86_64 */ @@ -2333,6 +2446,9 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long range; int err = 0; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -EINVAL; + prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == @@ -2351,8 +2467,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, if (err) goto out; - err = -EFAULT; - if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) + err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid); + if (err < 0) goto out; nr -= batch; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 72213da605f5..95fb2aa5927e 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -22,7 +22,7 @@ * * P2M_PER_PAGE depends on the architecture, as a mfn is always * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to - * 512 and 1024 entries respectively. + * 512 and 1024 entries respectively. * * In short, these structures contain the Machine Frame Number (MFN) of the PFN. * @@ -139,11 +139,11 @@ * / | ~0, ~0, .... | * | \---------------/ * | - * p2m_missing p2m_missing - * /------------------\ /------------\ - * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | - * | [p2m_mid_missing]+---->| ..., ~0 | - * \------------------/ \------------/ + * p2m_mid_missing p2m_missing + * /-----------------\ /------------\ + * | [p2m_missing] +---->| ~0, ~0, ~0 | + * | [p2m_missing] +---->| ..., ~0 | + * \-----------------/ \------------/ * * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) */ @@ -396,7 +396,85 @@ void __init xen_build_dynamic_phys_to_machine(void) m2p_override_init(); } +#ifdef CONFIG_X86_64 +#include +unsigned long __init xen_revector_p2m_tree(void) +{ + unsigned long va_start; + unsigned long va_end; + unsigned long pfn; + unsigned long pfn_free = 0; + unsigned long *mfn_list = NULL; + unsigned long size; + va_start = xen_start_info->mfn_list; + /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long), + * so make sure it is rounded up to that */ + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + va_end = va_start + size; + + /* If we were revectored already, don't do it again. */ + if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET) + return 0; + + mfn_list = alloc_bootmem_align(size, PAGE_SIZE); + if (!mfn_list) { + pr_warn("Could not allocate space for a new P2M tree!\n"); + return xen_start_info->mfn_list; + } + /* Fill it out with INVALID_P2M_ENTRY value */ + memset(mfn_list, 0xFF, size); + + for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { + unsigned topidx = p2m_top_index(pfn); + unsigned mididx; + unsigned long *mid_p; + + if (!p2m_top[topidx]) + continue; + + if (p2m_top[topidx] == p2m_mid_missing) + continue; + + mididx = p2m_mid_index(pfn); + mid_p = p2m_top[topidx][mididx]; + if (!mid_p) + continue; + if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) + continue; + + if ((unsigned long)mid_p == INVALID_P2M_ENTRY) + continue; + + /* The old va. Rebase it on mfn_list */ + if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { + unsigned long *new; + + if (pfn_free > (size / sizeof(unsigned long))) { + WARN(1, "Only allocated for %ld pages, but we want %ld!\n", + size / sizeof(unsigned long), pfn_free); + return 0; + } + new = &mfn_list[pfn_free]; + + copy_page(new, mid_p); + p2m_top[topidx][mididx] = &mfn_list[pfn_free]; + p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]); + + pfn_free += P2M_PER_PAGE; + + } + /* This should be the leafs allocated for identity from _brk. */ + } + return (unsigned long)mfn_list; + +} +#else +unsigned long __init xen_revector_p2m_tree(void) +{ + return 0; +} +#endif unsigned long get_phys_to_machine(unsigned long pfn) { unsigned topidx, mididx, idx; @@ -430,7 +508,7 @@ static void free_p2m_page(void *p) free_page((unsigned long)p); } -/* +/* * Fully allocate the p2m structure for a given pfn. We need to check * that both the top and mid levels are allocated, and make sure the * parallel mfn tree is kept in sync. We may race with other cpus, so diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 967633ad98c4..969570491c39 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -8,6 +8,14 @@ #include #include + +#include +#ifdef CONFIG_X86_64 +#include +#include +#endif +#include + int xen_swiotlb __read_mostly; static struct dma_map_ops xen_swiotlb_dma_ops = { @@ -34,34 +42,64 @@ static struct dma_map_ops xen_swiotlb_dma_ops = { int __init pci_xen_swiotlb_detect(void) { + if (!xen_pv_domain()) + return 0; + /* If running as PV guest, either iommu=soft, or swiotlb=force will * activate this IOMMU. If running as PV privileged, activate it * irregardless. */ - if ((xen_initial_domain() || swiotlb || swiotlb_force) && - (xen_pv_domain())) + if ((xen_initial_domain() || swiotlb || swiotlb_force)) xen_swiotlb = 1; /* If we are running under Xen, we MUST disable the native SWIOTLB. * Don't worry about swiotlb_force flag activating the native, as * the 'swiotlb' flag is the only one turning it on. */ - if (xen_pv_domain()) - swiotlb = 0; + swiotlb = 0; +#ifdef CONFIG_X86_64 + /* pci_swiotlb_detect_4gb turns on native SWIOTLB if no_iommu == 0 + * (so no iommu=X command line over-writes). + * Considering that PV guests do not want the *native SWIOTLB* but + * only Xen SWIOTLB it is not useful to us so set no_iommu=1 here. + */ + if (max_pfn > MAX_DMA32_PFN) + no_iommu = 1; +#endif return xen_swiotlb; } void __init pci_xen_swiotlb_init(void) { if (xen_swiotlb) { - xen_swiotlb_init(1); + xen_swiotlb_init(1, true /* early */); dma_ops = &xen_swiotlb_dma_ops; /* Make sure ACS will be enabled */ pci_request_acs(); } } + +int pci_xen_swiotlb_init_late(void) +{ + int rc; + + if (xen_swiotlb) + return 0; + + rc = xen_swiotlb_init(1, false /* late */); + if (rc) + return rc; + + dma_ops = &xen_swiotlb_dma_ops; + /* Make sure ACS will be enabled */ + pci_request_acs(); + + return 0; +} +EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late); + IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, - 0, + NULL, pci_xen_swiotlb_init, - 0); + NULL); diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index ffcf2615640b..0a7852483ffe 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -24,6 +24,7 @@ #include #include +#include "xen-ops.h" #define XEN_PLATFORM_ERR_MAGIC -1 #define XEN_PLATFORM_ERR_PROTOCOL -2 diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index e2d62d697b5d..8971a26d21ab 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -432,6 +432,24 @@ char * __init xen_memory_setup(void) * - mfn_list * - xen_start_info * See comment above "struct start_info" in + * We tried to make the the memblock_reserve more selective so + * that it would be clear what region is reserved. Sadly we ran + * in the problem wherein on a 64-bit hypervisor with a 32-bit + * initial domain, the pt_base has the cr3 value which is not + * neccessarily where the pagetable starts! As Jan put it: " + * Actually, the adjustment turns out to be correct: The page + * tables for a 32-on-64 dom0 get allocated in the order "first L1", + * "first L2", "first L3", so the offset to the page table base is + * indeed 2. When reading xen/include/public/xen.h's comment + * very strictly, this is not a violation (since there nothing is said + * that the first thing in the page table space is pointed to by + * pt_base; I admit that this seems to be implied though, namely + * do I think that it is implied that the page table space is the + * range [pt_base, pt_base + nt_pt_frames), whereas that + * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames), + * which - without a priori knowledge - the kernel would have + * difficulty to figure out)." - so lets just fall back to the + * easy way and reserve the whole region. */ memblock_reserve(__pa(xen_start_info->mfn_list), xen_start_info->pt_base - xen_start_info->mfn_list); diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c index 1cd7f4d11e29..6722e3733f02 100644 --- a/arch/x86/xen/vga.c +++ b/arch/x86/xen/vga.c @@ -35,6 +35,7 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) info->u.text_mode_3.font_height; break; + case XEN_VGATYPE_EFI_LFB: case XEN_VGATYPE_VESA_LFB: if (size < offsetof(struct dom0_vga_console_info, u.vesa_lfb.gbl_caps)) @@ -54,6 +55,12 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) screen_info->blue_pos = info->u.vesa_lfb.blue_pos; screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; + + if (info->video_type == XEN_VGATYPE_EFI_LFB) { + screen_info->orig_video_isVGA = VIDEO_TYPE_EFI; + break; + } + if (size >= offsetof(struct dom0_vga_console_info, u.vesa_lfb.gbl_caps) + sizeof(info->u.vesa_lfb.gbl_caps)) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index aaa7291c9259..7faed5869e5b 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -28,9 +28,61 @@ ENTRY(startup_xen) __FINIT .pushsection .text - .align PAGE_SIZE + .balign PAGE_SIZE ENTRY(hypercall_page) - .skip PAGE_SIZE +#define NEXT_HYPERCALL(x) \ + ENTRY(xen_hypercall_##x) \ + .skip 32 + +NEXT_HYPERCALL(set_trap_table) +NEXT_HYPERCALL(mmu_update) +NEXT_HYPERCALL(set_gdt) +NEXT_HYPERCALL(stack_switch) +NEXT_HYPERCALL(set_callbacks) +NEXT_HYPERCALL(fpu_taskswitch) +NEXT_HYPERCALL(sched_op_compat) +NEXT_HYPERCALL(platform_op) +NEXT_HYPERCALL(set_debugreg) +NEXT_HYPERCALL(get_debugreg) +NEXT_HYPERCALL(update_descriptor) +NEXT_HYPERCALL(ni) +NEXT_HYPERCALL(memory_op) +NEXT_HYPERCALL(multicall) +NEXT_HYPERCALL(update_va_mapping) +NEXT_HYPERCALL(set_timer_op) +NEXT_HYPERCALL(event_channel_op_compat) +NEXT_HYPERCALL(xen_version) +NEXT_HYPERCALL(console_io) +NEXT_HYPERCALL(physdev_op_compat) +NEXT_HYPERCALL(grant_table_op) +NEXT_HYPERCALL(vm_assist) +NEXT_HYPERCALL(update_va_mapping_otherdomain) +NEXT_HYPERCALL(iret) +NEXT_HYPERCALL(vcpu_op) +NEXT_HYPERCALL(set_segment_base) +NEXT_HYPERCALL(mmuext_op) +NEXT_HYPERCALL(xsm_op) +NEXT_HYPERCALL(nmi_op) +NEXT_HYPERCALL(sched_op) +NEXT_HYPERCALL(callback_op) +NEXT_HYPERCALL(xenoprof_op) +NEXT_HYPERCALL(event_channel_op) +NEXT_HYPERCALL(physdev_op) +NEXT_HYPERCALL(hvm_op) +NEXT_HYPERCALL(sysctl) +NEXT_HYPERCALL(domctl) +NEXT_HYPERCALL(kexec_op) +NEXT_HYPERCALL(tmem_op) /* 38 */ +ENTRY(xen_hypercall_rsvr) + .skip 320 +NEXT_HYPERCALL(mca) /* 48 */ +NEXT_HYPERCALL(arch_1) +NEXT_HYPERCALL(arch_2) +NEXT_HYPERCALL(arch_3) +NEXT_HYPERCALL(arch_4) +NEXT_HYPERCALL(arch_5) +NEXT_HYPERCALL(arch_6) + .balign PAGE_SIZE .popsection ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 202d4c150154..bb5a8105ea86 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void); void xen_setup_shared_info(void); void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); -pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); +void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_reserve_top(void); extern unsigned long xen_max_p2m_pfn; @@ -45,6 +45,7 @@ void xen_hvm_init_shared_info(void); void xen_unplug_emulated_devices(void); void __init xen_build_dynamic_phys_to_machine(void); +unsigned long __init xen_revector_p2m_tree(void); void xen_init_irq_ops(void); void xen_setup_timer(int cpu); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 682633bfe00f..05593d882023 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -635,9 +635,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) return; BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op)); - ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op, - npo.copy_prod); - BUG_ON(ret != 0); + gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod); while ((skb = __skb_dequeue(&rxq)) != NULL) { sco = (struct skb_cb_overlay *)skb->cb; @@ -1460,18 +1458,15 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk) static void xen_netbk_tx_action(struct xen_netbk *netbk) { unsigned nr_gops; - int ret; nr_gops = xen_netbk_tx_build_gops(netbk); if (nr_gops == 0) return; - ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, - netbk->tx_copy_ops, nr_gops); - BUG_ON(ret); + + gnttab_batch_copy(netbk->tx_copy_ops, nr_gops); xen_netbk_tx_submit(netbk); - } static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx) diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index def8d0b5620c..0aab85a51559 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -21,6 +21,7 @@ #include #include +#include #define INVALID_GRANT_REF (0) #define INVALID_EVTCHN (-1) @@ -236,7 +237,7 @@ static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn, return errno_to_pcibios_err(do_pci_op(pdev, &op)); } -struct pci_ops pcifront_bus_ops = { +static struct pci_ops pcifront_bus_ops = { .read = pcifront_bus_read, .write = pcifront_bus_write, }; @@ -668,7 +669,7 @@ static irqreturn_t pcifront_handler_aer(int irq, void *dev) schedule_pcifront_aer_op(pdev); return IRQ_HANDLED; } -static int pcifront_connect(struct pcifront_device *pdev) +static int pcifront_connect_and_init_dma(struct pcifront_device *pdev) { int err = 0; @@ -681,9 +682,13 @@ static int pcifront_connect(struct pcifront_device *pdev) dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n"); err = -EEXIST; } - spin_unlock(&pcifront_dev_lock); + if (!err && !swiotlb_nr_tbl()) { + err = pci_xen_swiotlb_init_late(); + if (err) + dev_err(&pdev->xdev->dev, "Could not setup SWIOTLB!\n"); + } return err; } @@ -842,10 +847,10 @@ static int __devinit pcifront_try_connect(struct pcifront_device *pdev) XenbusStateInitialised) goto out; - err = pcifront_connect(pdev); + err = pcifront_connect_and_init_dma(pdev); if (err) { xenbus_dev_fatal(pdev->xdev, err, - "Error connecting PCI Frontend"); + "Error setting up PCI Frontend"); goto out; } diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 1e456dca4f60..2944ff88fdc0 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,7 @@ #include #include #include +#include #include #include diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 7595581d032c..c60d1629c916 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -373,11 +373,22 @@ static void unmask_evtchn(int port) { struct shared_info *s = HYPERVISOR_shared_info; unsigned int cpu = get_cpu(); + int do_hypercall = 0, evtchn_pending = 0; BUG_ON(!irqs_disabled()); - /* Slow path (hypercall) if this is a non-local port. */ - if (unlikely(cpu != cpu_from_evtchn(port))) { + if (unlikely((cpu != cpu_from_evtchn(port)))) + do_hypercall = 1; + else + evtchn_pending = sync_test_bit(port, &s->evtchn_pending[0]); + + if (unlikely(evtchn_pending && xen_hvm_domain())) + do_hypercall = 1; + + /* Slow path (hypercall) if this is a non-local port or if this is + * an hvm domain and an event is pending (hvm domains don't have + * their own implementation of irq_enable). */ + if (do_hypercall) { struct evtchn_unmask unmask = { .port = port }; (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); } else { @@ -390,7 +401,7 @@ static void unmask_evtchn(int port) * 'hw_resend_irq'. Just like a real IO-APIC we 'lose * the interrupt edge' if the channel is masked. */ - if (sync_test_bit(port, &s->evtchn_pending[0]) && + if (evtchn_pending && !sync_test_and_set_bit(port / BITS_PER_LONG, &vcpu_info->evtchn_pending_sel)) vcpu_info->evtchn_upcall_pending = 1; @@ -831,6 +842,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) struct irq_info *info = info_for_irq(irq); WARN_ON(info == NULL || info->type != IRQT_EVTCHN); } + irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN); out: mutex_unlock(&irq_mapping_update_lock); diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 7f1241608489..5df9fd847b2e 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -446,7 +446,7 @@ static void mn_release(struct mmu_notifier *mn, spin_unlock(&priv->lock); } -struct mmu_notifier_ops gntdev_mmu_ops = { +static struct mmu_notifier_ops gntdev_mmu_ops = { .release = mn_release, .invalidate_page = mn_invl_page, .invalidate_range_start = mn_invl_range_start, diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 006726688baf..b2b0a375b348 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -47,6 +48,7 @@ #include #include #include +#include #include #include @@ -285,10 +287,9 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); -void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid, - unsigned long frame, int flags, - unsigned page_off, - unsigned length) +static void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, unsigned length) { gnttab_shared.v2[ref].sub_page.frame = frame; gnttab_shared.v2[ref].sub_page.page_off = page_off; @@ -345,9 +346,9 @@ bool gnttab_subpage_grants_available(void) } EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available); -void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid, - int flags, domid_t trans_domid, - grant_ref_t trans_gref) +static void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid, + int flags, domid_t trans_domid, + grant_ref_t trans_gref) { gnttab_shared.v2[ref].transitive.trans_domid = trans_domid; gnttab_shared.v2[ref].transitive.gref = trans_gref; @@ -823,6 +824,52 @@ unsigned int gnttab_max_grant_frames(void) } EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); +/* Handling of paged out grant targets (GNTST_eagain) */ +#define MAX_DELAY 256 +static inline void +gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status, + const char *func) +{ + unsigned delay = 1; + + do { + BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1)); + if (*status == GNTST_eagain) + msleep(delay++); + } while ((*status == GNTST_eagain) && (delay < MAX_DELAY)); + + if (delay >= MAX_DELAY) { + printk(KERN_ERR "%s: %s eagain grant\n", func, current->comm); + *status = GNTST_bad_page; + } +} + +void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count) +{ + struct gnttab_map_grant_ref *op; + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count)) + BUG(); + for (op = batch; op < batch + count; op++) + if (op->status == GNTST_eagain) + gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op, + &op->status, __func__); +} +EXPORT_SYMBOL_GPL(gnttab_batch_map); + +void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count) +{ + struct gnttab_copy *op; + + if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count)) + BUG(); + for (op = batch; op < batch + count; op++) + if (op->status == GNTST_eagain) + gnttab_retry_eagain_gop(GNTTABOP_copy, op, + &op->status, __func__); +} +EXPORT_SYMBOL_GPL(gnttab_batch_copy); + int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count) @@ -836,6 +883,12 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, if (ret) return ret; + /* Retry eagain maps */ + for (i = 0; i < count; i++) + if (map_ops[i].status == GNTST_eagain) + gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i, + &map_ops[i].status, __func__); + if (xen_feature(XENFEAT_auto_translated_physmap)) return ret; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index ccee0f16bcf8..ef6389580b8c 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages) */ static int gather_array(struct list_head *pagelist, unsigned nelem, size_t size, - void __user *data) + const void __user *data) { unsigned pageidx; void *pagedata; @@ -246,61 +246,117 @@ struct mmap_batch_state { domid_t domain; unsigned long va; struct vm_area_struct *vma; - int err; + /* A tristate: + * 0 for no errors + * 1 if at least one error has happened (and no + * -ENOENT errors have happened) + * -ENOENT if at least 1 -ENOENT has happened. + */ + int global_error; + /* An array for individual errors */ + int *err; - xen_pfn_t __user *user; + /* User-space mfn array to store errors in the second pass for V1. */ + xen_pfn_t __user *user_mfn; }; static int mmap_batch_fn(void *data, void *state) { xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; + int ret; - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, - st->vma->vm_page_prot, st->domain) < 0) { - *mfnp |= 0xf0000000U; - st->err++; + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, + st->vma->vm_page_prot, st->domain); + + /* Store error code for second pass. */ + *(st->err++) = ret; + + /* And see if it affects the global_error. */ + if (ret < 0) { + if (ret == -ENOENT) + st->global_error = -ENOENT; + else { + /* Record that at least one error has happened. */ + if (st->global_error == 0) + st->global_error = 1; + } } st->va += PAGE_SIZE; return 0; } -static int mmap_return_errors(void *data, void *state) +static int mmap_return_errors_v1(void *data, void *state) { xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; + int err = *(st->err++); - return put_user(*mfnp, st->user++); + /* + * V1 encodes the error codes in the 32bit top nibble of the + * mfn (with its known limitations vis-a-vis 64 bit callers). + */ + *mfnp |= (err == -ENOENT) ? + PRIVCMD_MMAPBATCH_PAGED_ERROR : + PRIVCMD_MMAPBATCH_MFN_ERROR; + return __put_user(*mfnp, st->user_mfn++); } static struct vm_operations_struct privcmd_vm_ops; -static long privcmd_ioctl_mmap_batch(void __user *udata) +static long privcmd_ioctl_mmap_batch(void __user *udata, int version) { int ret; - struct privcmd_mmapbatch m; + struct privcmd_mmapbatch_v2 m; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long nr_pages; LIST_HEAD(pagelist); + int *err_array = NULL; struct mmap_batch_state state; if (!xen_initial_domain()) return -EPERM; - if (copy_from_user(&m, udata, sizeof(m))) - return -EFAULT; + switch (version) { + case 1: + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) + return -EFAULT; + /* Returns per-frame error in m.arr. */ + m.err = NULL; + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) + return -EFAULT; + break; + case 2: + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) + return -EFAULT; + /* Returns per-frame error code in m.err. */ + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) + return -EFAULT; + break; + default: + return -EINVAL; + } nr_pages = m.num; if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) return -EINVAL; - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), - m.arr); + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); - if (ret || list_empty(&pagelist)) + if (ret) goto out; + if (list_empty(&pagelist)) { + ret = -EINVAL; + goto out; + } + + err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL); + if (err_array == NULL) { + ret = -ENOMEM; + goto out; + } down_write(&mm->mmap_sem); @@ -315,24 +371,37 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) goto out; } - state.domain = m.dom; - state.vma = vma; - state.va = m.addr; - state.err = 0; + state.domain = m.dom; + state.vma = vma; + state.va = m.addr; + state.global_error = 0; + state.err = err_array; - ret = traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, mmap_batch_fn, &state); + /* mmap_batch_fn guarantees ret == 0 */ + BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), + &pagelist, mmap_batch_fn, &state)); up_write(&mm->mmap_sem); - if (state.err > 0) { - state.user = m.arr; + if (state.global_error && (version == 1)) { + /* Write back errors in second pass. */ + state.user_mfn = (xen_pfn_t *)m.arr; + state.err = err_array; ret = traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, - mmap_return_errors, &state); + &pagelist, mmap_return_errors_v1, &state); + } else if (version == 2) { + ret = __copy_to_user(m.err, err_array, m.num * sizeof(int)); + if (ret) + ret = -EFAULT; } + /* If we have not had any EFAULT-like global errors then set the global + * error to -ENOENT if necessary. */ + if ((ret == 0) && (state.global_error == -ENOENT)) + ret = -ENOENT; + out: + kfree(err_array); free_page_list(&pagelist); return ret; @@ -354,7 +423,11 @@ static long privcmd_ioctl(struct file *file, break; case IOCTL_PRIVCMD_MMAPBATCH: - ret = privcmd_ioctl_mmap_batch(udata); + ret = privcmd_ioctl_mmap_batch(udata, 1); + break; + + case IOCTL_PRIVCMD_MMAPBATCH_V2: + ret = privcmd_ioctl_mmap_batch(udata, 2); break; default: @@ -380,10 +453,6 @@ static struct vm_operations_struct privcmd_vm_ops = { static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) { - /* Unsupported for auto-translate guests. */ - if (xen_feature(XENFEAT_auto_translated_physmap)) - return -ENOSYS; - /* DONTCOPY is essential for Xen because copy_page_range doesn't know * how to recreate these mappings */ vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 4d519488d304..58db6df866ef 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -52,7 +52,7 @@ static unsigned long xen_io_tlb_nslabs; * Quick lookup value of the bus address of the IOTLB. */ -u64 start_dma_addr; +static u64 start_dma_addr; static dma_addr_t xen_phys_to_bus(phys_addr_t paddr) { @@ -144,31 +144,72 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) } while (i < nslabs); return 0; } - -void __init xen_swiotlb_init(int verbose) +static unsigned long xen_set_nslabs(unsigned long nr_tbl) { - unsigned long bytes; - int rc = -ENOMEM; - unsigned long nr_tbl; - char *m = NULL; - unsigned int repeat = 3; - - nr_tbl = swiotlb_nr_tbl(); - if (nr_tbl) - xen_io_tlb_nslabs = nr_tbl; - else { + if (!nr_tbl) { xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); - } -retry: - bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; + } else + xen_io_tlb_nslabs = nr_tbl; + return xen_io_tlb_nslabs << IO_TLB_SHIFT; +} + +enum xen_swiotlb_err { + XEN_SWIOTLB_UNKNOWN = 0, + XEN_SWIOTLB_ENOMEM, + XEN_SWIOTLB_EFIXUP +}; + +static const char *xen_swiotlb_error(enum xen_swiotlb_err err) +{ + switch (err) { + case XEN_SWIOTLB_ENOMEM: + return "Cannot allocate Xen-SWIOTLB buffer\n"; + case XEN_SWIOTLB_EFIXUP: + return "Failed to get contiguous memory for DMA from Xen!\n"\ + "You either: don't have the permissions, do not have"\ + " enough free memory under 4GB, or the hypervisor memory"\ + " is too fragmented!"; + default: + break; + } + return ""; +} +int __ref xen_swiotlb_init(int verbose, bool early) +{ + unsigned long bytes, order; + int rc = -ENOMEM; + enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN; + unsigned int repeat = 3; + + xen_io_tlb_nslabs = swiotlb_nr_tbl(); +retry: + bytes = xen_set_nslabs(xen_io_tlb_nslabs); + order = get_order(xen_io_tlb_nslabs << IO_TLB_SHIFT); /* * Get IO TLB memory from any location. */ - xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes)); + if (early) + xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes)); + else { +#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) +#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) + while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { + xen_io_tlb_start = (void *)__get_free_pages(__GFP_NOWARN, order); + if (xen_io_tlb_start) + break; + order--; + } + if (order != get_order(bytes)) { + pr_warn("Warning: only able to allocate %ld MB " + "for software IO TLB\n", (PAGE_SIZE << order) >> 20); + xen_io_tlb_nslabs = SLABS_PER_PAGE << order; + bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; + } + } if (!xen_io_tlb_start) { - m = "Cannot allocate Xen-SWIOTLB buffer!\n"; + m_ret = XEN_SWIOTLB_ENOMEM; goto error; } xen_io_tlb_end = xen_io_tlb_start + bytes; @@ -179,17 +220,22 @@ retry: bytes, xen_io_tlb_nslabs); if (rc) { - free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes)); - m = "Failed to get contiguous memory for DMA from Xen!\n"\ - "You either: don't have the permissions, do not have"\ - " enough free memory under 4GB, or the hypervisor memory"\ - "is too fragmented!"; + if (early) + free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes)); + else { + free_pages((unsigned long)xen_io_tlb_start, order); + xen_io_tlb_start = NULL; + } + m_ret = XEN_SWIOTLB_EFIXUP; goto error; } start_dma_addr = xen_virt_to_bus(xen_io_tlb_start); - swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); - - return; + if (early) { + swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); + rc = 0; + } else + rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs); + return rc; error: if (repeat--) { xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */ @@ -198,10 +244,13 @@ error: (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20); goto retry; } - xen_raw_printk("%s (rc:%d)", m, rc); - panic("%s (rc:%d)", m, rc); + pr_err("%s (rc:%d)", xen_swiotlb_error(m_ret), rc); + if (early) + panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc); + else + free_pages((unsigned long)xen_io_tlb_start, order); + return rc; } - void * xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags, @@ -466,14 +515,6 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, } EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg_attrs); -int -xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - return xen_swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); -} -EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg); - /* * Unmap a set of streaming mode DMA translations. Again, cpu read rules * concerning calls here are the same as for swiotlb_unmap_page() above. @@ -494,14 +535,6 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, } EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs); -void -xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - return xen_swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); -} -EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg); - /* * Make physical memory consistent for a set of streaming mode DMA translations * after a transfer. diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index fdb6d229c9bb..5e5ad7e28858 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -114,7 +114,7 @@ static void xen_sysfs_version_destroy(void) /* UUID */ -static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) +static ssize_t uuid_show_fallback(struct hyp_sysfs_attr *attr, char *buffer) { char *vm, *val; int ret; @@ -135,6 +135,17 @@ static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) return ret; } +static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + xen_domain_handle_t uuid; + int ret; + ret = HYPERVISOR_xen_version(XENVER_guest_handle, uuid); + if (ret) + return uuid_show_fallback(attr, buffer); + ret = sprintf(buffer, "%pU\n", uuid); + return ret; +} + HYPERVISOR_ATTR_RO(uuid); static int __init xen_sysfs_uuid_init(void) diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 89f264c67420..144564e5eb29 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -21,6 +21,7 @@ #include #include #include +#include #define TMEM_CONTROL 0 #define TMEM_NEW_POOL 1 diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 92ff01dbeb10..961d664e2d2f 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -362,6 +362,7 @@ static int __devinit pcistub_init_device(struct pci_dev *dev) else { dev_dbg(&dev->dev, "reseting (FLR, D3, etc) the device\n"); __pci_reset_function_locked(dev); + pci_restore_state(dev); } /* Now disable the device (this also ensures some private device * data is setup before we export) @@ -681,14 +682,14 @@ static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev) dev_err(&dev->dev, DRV_NAME " device is not connected or owned" " by HVM, kill it\n"); kill_domain_by_device(psdev); - goto release; + goto end; } if (!test_bit(_XEN_PCIB_AERHANDLER, (unsigned long *)&psdev->pdev->sh_info->flags)) { dev_err(&dev->dev, "guest with no AER driver should have been killed\n"); - goto release; + goto end; } result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result); @@ -698,9 +699,9 @@ static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev) "No AER slot_reset service or disconnected!\n"); kill_domain_by_device(psdev); } -release: - pcistub_device_put(psdev); end: + if (psdev) + pcistub_device_put(psdev); up_write(&pcistub_sem); return result; @@ -739,14 +740,14 @@ static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev) dev_err(&dev->dev, DRV_NAME " device is not connected or owned" " by HVM, kill it\n"); kill_domain_by_device(psdev); - goto release; + goto end; } if (!test_bit(_XEN_PCIB_AERHANDLER, (unsigned long *)&psdev->pdev->sh_info->flags)) { dev_err(&dev->dev, "guest with no AER driver should have been killed\n"); - goto release; + goto end; } result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result); @@ -756,9 +757,9 @@ static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev) "No AER mmio_enabled service or disconnected!\n"); kill_domain_by_device(psdev); } -release: - pcistub_device_put(psdev); end: + if (psdev) + pcistub_device_put(psdev); up_write(&pcistub_sem); return result; } @@ -797,7 +798,7 @@ static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev, dev_err(&dev->dev, DRV_NAME " device is not connected or owned" " by HVM, kill it\n"); kill_domain_by_device(psdev); - goto release; + goto end; } /*Guest owns the device yet no aer handler regiested, kill guest*/ @@ -805,7 +806,7 @@ static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev, (unsigned long *)&psdev->pdev->sh_info->flags)) { dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n"); kill_domain_by_device(psdev); - goto release; + goto end; } result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result); @@ -815,9 +816,9 @@ static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev, "No AER error_detected service or disconnected!\n"); kill_domain_by_device(psdev); } -release: - pcistub_device_put(psdev); end: + if (psdev) + pcistub_device_put(psdev); up_write(&pcistub_sem); return result; } @@ -851,7 +852,7 @@ static void xen_pcibk_error_resume(struct pci_dev *dev) dev_err(&dev->dev, DRV_NAME " device is not connected or owned" " by HVM, kill it\n"); kill_domain_by_device(psdev); - goto release; + goto end; } if (!test_bit(_XEN_PCIB_AERHANDLER, @@ -859,13 +860,13 @@ static void xen_pcibk_error_resume(struct pci_dev *dev) dev_err(&dev->dev, "guest with no AER driver should have been killed\n"); kill_domain_by_device(psdev); - goto release; + goto end; } common_process(psdev, 1, XEN_PCI_OP_aer_resume, PCI_ERS_RESULT_RECOVERED); -release: - pcistub_device_put(psdev); end: + if (psdev) + pcistub_device_put(psdev); up_write(&pcistub_sem); return; } @@ -897,17 +898,41 @@ static inline int str_to_slot(const char *buf, int *domain, int *bus, int *slot, int *func) { int err; + char wc = '*'; err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func); - if (err == 4) + switch (err) { + case 3: + *func = -1; + err = sscanf(buf, " %x:%x:%x.%c", domain, bus, slot, &wc); + break; + case 2: + *slot = *func = -1; + err = sscanf(buf, " %x:%x:*.%c", domain, bus, &wc); + if (err >= 2) + ++err; + break; + } + if (err == 4 && wc == '*') return 0; else if (err < 0) return -EINVAL; /* try again without domain */ *domain = 0; + wc = '*'; err = sscanf(buf, " %x:%x.%x", bus, slot, func); - if (err == 3) + switch (err) { + case 2: + *func = -1; + err = sscanf(buf, " %x:%x.%c", bus, slot, &wc); + break; + case 1: + *slot = *func = -1; + err = sscanf(buf, " %x:*.%c", bus, &wc) + 1; + break; + } + if (err == 3 && wc == '*') return 0; return -EINVAL; @@ -930,6 +955,19 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func) { struct pcistub_device_id *pci_dev_id; unsigned long flags; + int rc = 0; + + if (slot < 0) { + for (slot = 0; !rc && slot < 32; ++slot) + rc = pcistub_device_id_add(domain, bus, slot, func); + return rc; + } + + if (func < 0) { + for (func = 0; !rc && func < 8; ++func) + rc = pcistub_device_id_add(domain, bus, slot, func); + return rc; + } pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL); if (!pci_dev_id) @@ -952,15 +990,15 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func) static int pcistub_device_id_remove(int domain, int bus, int slot, int func) { struct pcistub_device_id *pci_dev_id, *t; - int devfn = PCI_DEVFN(slot, func); int err = -ENOENT; unsigned long flags; spin_lock_irqsave(&device_ids_lock, flags); list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) { - if (pci_dev_id->domain == domain - && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) { + if (pci_dev_id->domain == domain && pci_dev_id->bus == bus + && (slot < 0 || PCI_SLOT(pci_dev_id->devfn) == slot) + && (func < 0 || PCI_FUNC(pci_dev_id->devfn) == func)) { /* Don't break; here because it's possible the same * slot could be in the list more than once */ @@ -987,7 +1025,7 @@ static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg, struct config_field *field; psdev = pcistub_device_find(domain, bus, slot, func); - if (!psdev || !psdev->dev) { + if (!psdev) { err = -ENODEV; goto out; } @@ -1011,6 +1049,8 @@ static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg, if (err) kfree(field); out: + if (psdev) + pcistub_device_put(psdev); return err; } @@ -1115,10 +1155,9 @@ static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, err = str_to_slot(buf, &domain, &bus, &slot, &func); if (err) - goto out; + return err; psdev = pcistub_device_find(domain, bus, slot, func); - if (!psdev) goto out; @@ -1134,6 +1173,8 @@ static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, if (dev_data->isr_on) dev_data->ack_intr = 1; out: + if (psdev) + pcistub_device_put(psdev); if (!err) err = count; return err; @@ -1216,15 +1257,16 @@ static ssize_t permissive_add(struct device_driver *drv, const char *buf, err = str_to_slot(buf, &domain, &bus, &slot, &func); if (err) goto out; + if (slot < 0 || func < 0) { + err = -EINVAL; + goto out; + } psdev = pcistub_device_find(domain, bus, slot, func); if (!psdev) { err = -ENODEV; goto out; } - if (!psdev->dev) { - err = -ENODEV; - goto release; - } + dev_data = pci_get_drvdata(psdev->dev); /* the driver data for a device should never be null at this point */ if (!dev_data) { @@ -1297,17 +1339,51 @@ static int __init pcistub_init(void) if (pci_devs_to_hide && *pci_devs_to_hide) { do { + char wc = '*'; + parsed = 0; err = sscanf(pci_devs_to_hide + pos, " (%x:%x:%x.%x) %n", &domain, &bus, &slot, &func, &parsed); - if (err != 4) { + switch (err) { + case 3: + func = -1; + err = sscanf(pci_devs_to_hide + pos, + " (%x:%x:%x.%c) %n", + &domain, &bus, &slot, &wc, + &parsed); + break; + case 2: + slot = func = -1; + err = sscanf(pci_devs_to_hide + pos, + " (%x:%x:*.%c) %n", + &domain, &bus, &wc, &parsed) + 1; + break; + } + + if (err != 4 || wc != '*') { domain = 0; + wc = '*'; err = sscanf(pci_devs_to_hide + pos, " (%x:%x.%x) %n", &bus, &slot, &func, &parsed); - if (err != 3) + switch (err) { + case 2: + func = -1; + err = sscanf(pci_devs_to_hide + pos, + " (%x:%x.%c) %n", + &bus, &slot, &wc, + &parsed); + break; + case 1: + slot = func = -1; + err = sscanf(pci_devs_to_hide + pos, + " (%x:*.%c) %n", + &bus, &wc, &parsed) + 1; + break; + } + if (err != 3 || wc != '*') goto parse_error; } diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index b3e146edb51d..bcf3ba4a6ec1 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -490,8 +490,7 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, op.host_addr = arbitrary_virt_to_machine(pte).maddr; - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) - BUG(); + gnttab_batch_map(&op, 1); if (op.status != GNTST_okay) { free_vm_area(area); @@ -572,8 +571,7 @@ int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, gnt_ref, dev->otherend_id); - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) - BUG(); + gnttab_batch_map(&op, 1); if (op.status != GNTST_okay) { xenbus_dev_fatal(dev, op.status, diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 52fe7ad07666..c5aa55c5d371 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -224,7 +224,7 @@ int xb_init_comms(void) int err; err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, 0, "xenbus", &xb_waitq); - if (err <= 0) { + if (err < 0) { printk(KERN_ERR "XENBUS request irq failed %i\n", err); return err; } diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c index be738c43104b..d73000800762 100644 --- a/drivers/xen/xenbus/xenbus_dev_backend.c +++ b/drivers/xen/xenbus/xenbus_dev_backend.c @@ -107,7 +107,7 @@ static int xenbus_backend_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -const struct file_operations xenbus_backend_fops = { +static const struct file_operations xenbus_backend_fops = { .open = xenbus_backend_open, .mmap = xenbus_backend_mmap, .unlocked_ioctl = xenbus_backend_ioctl, diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index b793723e724d..038b71dbf03c 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -324,8 +324,8 @@ static int cmp_dev(struct device *dev, void *data) return 0; } -struct xenbus_device *xenbus_device_find(const char *nodename, - struct bus_type *bus) +static struct xenbus_device *xenbus_device_find(const char *nodename, + struct bus_type *bus) { struct xb_find_info info = { .dev = NULL, .nodename = nodename }; @@ -719,17 +719,47 @@ static int __init xenstored_local_init(void) return err; } +enum xenstore_init { + UNKNOWN, + PV, + HVM, + LOCAL, +}; static int __init xenbus_init(void) { int err = 0; + enum xenstore_init usage = UNKNOWN; + uint64_t v = 0; if (!xen_domain()) return -ENODEV; xenbus_ring_ops_init(); - if (xen_hvm_domain()) { - uint64_t v = 0; + if (xen_pv_domain()) + usage = PV; + if (xen_hvm_domain()) + usage = HVM; + if (xen_hvm_domain() && xen_initial_domain()) + usage = LOCAL; + if (xen_pv_domain() && !xen_start_info->store_evtchn) + usage = LOCAL; + if (xen_pv_domain() && xen_start_info->store_evtchn) + xenstored_ready = 1; + + switch (usage) { + case LOCAL: + err = xenstored_local_init(); + if (err) + goto out_error; + xen_store_interface = mfn_to_virt(xen_store_mfn); + break; + case PV: + xen_store_evtchn = xen_start_info->store_evtchn; + xen_store_mfn = xen_start_info->store_mfn; + xen_store_interface = mfn_to_virt(xen_store_mfn); + break; + case HVM: err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); if (err) goto out_error; @@ -738,18 +768,12 @@ static int __init xenbus_init(void) if (err) goto out_error; xen_store_mfn = (unsigned long)v; - xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); - } else { - xen_store_evtchn = xen_start_info->store_evtchn; - xen_store_mfn = xen_start_info->store_mfn; - if (xen_store_evtchn) - xenstored_ready = 1; - else { - err = xenstored_local_init(); - if (err) - goto out_error; - } - xen_store_interface = mfn_to_virt(xen_store_mfn); + xen_store_interface = + ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); + break; + default: + pr_warn("Xenstore state unknown\n"); + break; } /* Initialize the interface to xenstore. */ diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index a31b54d48839..3159a37d966d 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -21,6 +21,7 @@ #include #include #include +#include #include diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index bce15cf4a8df..131dec04794e 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include "xenbus_comms.h" @@ -622,7 +623,7 @@ static void xs_reset_watches(void) { int err, supported = 0; - if (!xen_hvm_domain()) + if (!xen_hvm_domain() || xen_initial_domain()) return; err = xenbus_scanf(XBT_NIL, "control", diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index e872526fdc5f..8d08b3ed406d 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -25,6 +25,7 @@ extern int swiotlb_force; extern void swiotlb_init(int verbose); extern void swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); extern unsigned long swiotlb_nr_tbl(void); +extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); /* * Enumeration for sync targets diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index f19fff8650e9..aecee9d112cb 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -190,4 +190,16 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_map_grant_ref *kunmap_ops, struct page **pages, unsigned int count); +/* Perform a batch of grant map/copy operations. Retry every batch slot + * for which the hypervisor returns GNTST_eagain. This is typically due + * to paged out target frames. + * + * Will retry for 1, 2, ... 255 ms, i.e. 256 times during 32 seconds. + * + * Return value in each iand every status field of the batch guaranteed + * to not be GNTST_eagain. + */ +void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count); +void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count); + #endif /* __ASM_GNTTAB_H__ */ diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h index a17d84433e6a..f9f8b975ae74 100644 --- a/include/xen/interface/grant_table.h +++ b/include/xen/interface/grant_table.h @@ -338,7 +338,7 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table); #define GNTTABOP_transfer 4 struct gnttab_transfer { /* IN parameters. */ - unsigned long mfn; + xen_pfn_t mfn; domid_t domid; grant_ref_t ref; /* OUT parameters. */ @@ -375,7 +375,7 @@ struct gnttab_copy { struct { union { grant_ref_t ref; - unsigned long gmfn; + xen_pfn_t gmfn; } u; domid_t domid; uint16_t offset; @@ -519,7 +519,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version); #define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ #define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ #define GNTST_bad_page (-9) /* Specified page was invalid for op. */ -#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary */ +#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ +#define GNTST_address_too_big (-11) /* transfer page address too large. */ +#define GNTST_eagain (-12) /* Operation not done; try again. */ #define GNTTABOP_error_msgs { \ "okay", \ @@ -532,7 +534,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version); "no spare translation slot in the I/O MMU", \ "permission denied", \ "bad page", \ - "copy arguments cross page boundary" \ + "copy arguments cross page boundary", \ + "page address size too large", \ + "operation not done; try again" \ } #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index eac3ce153719..d8e33a93ea4d 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -31,7 +31,7 @@ struct xen_memory_reservation { * OUT: GMFN bases of extents that were allocated * (NB. This command also updates the mach_to_phys translation table) */ - GUEST_HANDLE(ulong) extent_start; + GUEST_HANDLE(xen_pfn_t) extent_start; /* Number of extents, and size/alignment of each (2^extent_order pages). */ unsigned long nr_extents; @@ -130,7 +130,7 @@ struct xen_machphys_mfn_list { * any large discontiguities in the machine address space, 2MB gaps in * the machphys table will be represented by an MFN base of zero. */ - GUEST_HANDLE(ulong) extent_start; + GUEST_HANDLE(xen_pfn_t) extent_start; /* * Number of extents written to the above array. This will be smaller @@ -163,6 +163,9 @@ struct xen_add_to_physmap { /* Which domain to change the mapping for. */ domid_t domid; + /* Number of pages to go through for gmfn_range */ + uint16_t size; + /* Source mapping space. */ #define XENMAPSPACE_shared_info 0 /* shared info page */ #define XENMAPSPACE_grant_table 1 /* grant table page */ @@ -172,7 +175,7 @@ struct xen_add_to_physmap { unsigned long idx; /* GPFN where the source mapping page should appear. */ - unsigned long gpfn; + xen_pfn_t gpfn; }; DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h index 61fa66160983..54ad6f9e4725 100644 --- a/include/xen/interface/platform.h +++ b/include/xen/interface/platform.h @@ -54,7 +54,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime_t); #define XENPF_add_memtype 31 struct xenpf_add_memtype { /* IN variables. */ - unsigned long mfn; + xen_pfn_t mfn; uint64_t nr_mfns; uint32_t type; /* OUT variables. */ @@ -84,7 +84,7 @@ struct xenpf_read_memtype { /* IN variables. */ uint32_t reg; /* OUT variables. */ - unsigned long mfn; + xen_pfn_t mfn; uint64_t nr_mfns; uint32_t type; }; @@ -112,6 +112,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_platform_quirk_t); #define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ #define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ #define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ +#define XEN_FW_KBD_SHIFT_FLAGS 5 /* Int16, Fn02: Get keyboard shift flags. */ struct xenpf_firmware_info { /* IN variables. */ uint32_t type; @@ -142,6 +143,8 @@ struct xenpf_firmware_info { /* must refer to 128-byte buffer */ GUEST_HANDLE(uchar) edid; } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ + + uint8_t kbd_shift_flags; /* XEN_FW_KBD_SHIFT_FLAGS */ } u; }; DEFINE_GUEST_HANDLE_STRUCT(xenpf_firmware_info_t); diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h index e8b6519d47e9..dd58cf5ea3e4 100644 --- a/include/xen/interface/version.h +++ b/include/xen/interface/version.h @@ -60,4 +60,7 @@ struct xen_feature_info { /* arg == NULL; returns host memory page size. */ #define XENVER_pagesize 7 +/* arg == xen_domain_handle_t. */ +#define XENVER_guest_handle 8 + #endif /* __XEN_PUBLIC_VERSION_H__ */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 0801468f9abe..886a5d80a18f 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -10,7 +10,6 @@ #define __XEN_PUBLIC_XEN_H__ #include -#include /* * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS). @@ -190,7 +189,7 @@ struct mmuext_op { unsigned int cmd; union { /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ - unsigned long mfn; + xen_pfn_t mfn; /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ unsigned long linear_addr; } arg1; @@ -430,11 +429,11 @@ struct start_info { unsigned long nr_pages; /* Total pages allocated to this domain. */ unsigned long shared_info; /* MACHINE address of shared info struct. */ uint32_t flags; /* SIF_xxx flags. */ - unsigned long store_mfn; /* MACHINE page number of shared page. */ + xen_pfn_t store_mfn; /* MACHINE page number of shared page. */ uint32_t store_evtchn; /* Event channel for store communication. */ union { struct { - unsigned long mfn; /* MACHINE page number of console page. */ + xen_pfn_t mfn; /* MACHINE page number of console page. */ uint32_t evtchn; /* Event channel for console page. */ } domU; struct { @@ -455,6 +454,7 @@ struct dom0_vga_console_info { uint8_t video_type; #define XEN_VGATYPE_TEXT_MODE_3 0x03 #define XEN_VGATYPE_VESA_LFB 0x23 +#define XEN_VGATYPE_EFI_LFB 0x70 union { struct { diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h index 17857fb4d550..a85316811d79 100644 --- a/include/xen/privcmd.h +++ b/include/xen/privcmd.h @@ -35,8 +35,7 @@ #include #include - -typedef unsigned long xen_pfn_t; +#include struct privcmd_hypercall { __u64 op; @@ -59,13 +58,33 @@ struct privcmd_mmapbatch { int num; /* number of pages to populate */ domid_t dom; /* target domain */ __u64 addr; /* virtual address */ - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ + xen_pfn_t __user *arr; /* array of mfns - or'd with + PRIVCMD_MMAPBATCH_*_ERROR on err */ +}; + +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U +#define PRIVCMD_MMAPBATCH_PAGED_ERROR 0x80000000U + +struct privcmd_mmapbatch_v2 { + unsigned int num; /* number of pages to populate */ + domid_t dom; /* target domain */ + __u64 addr; /* virtual address */ + const xen_pfn_t __user *arr; /* array of mfns */ + int __user *err; /* array of error codes */ }; /* * @cmd: IOCTL_PRIVCMD_HYPERCALL * @arg: &privcmd_hypercall_t * Return: Value returned from execution of the specified hypercall. + * + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2 + * @arg: &struct privcmd_mmapbatch_v2 + * Return: 0 on success (i.e., arg->err contains valid error codes for + * each frame). On an error other than a failed frame remap, -1 is + * returned and errno is set to EINVAL, EFAULT etc. As an exception, + * if the operation was otherwise successful but any frame failed with + * -ENOENT, then -1 is returned and errno is set to ENOENT. */ #define IOCTL_PRIVCMD_HYPERCALL \ _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall)) @@ -73,5 +92,7 @@ struct privcmd_mmapbatch { _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap)) #define IOCTL_PRIVCMD_MMAPBATCH \ _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \ + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2)) #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h index 4f4d449f00f6..de8bcc641c49 100644 --- a/include/xen/swiotlb-xen.h +++ b/include/xen/swiotlb-xen.h @@ -3,7 +3,7 @@ #include -extern void xen_swiotlb_init(int verbose); +extern int xen_swiotlb_init(int verbose, bool early); extern void *xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, @@ -23,15 +23,6 @@ extern dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, extern void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs); -/* -extern int -xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, - enum dma_data_direction dir); - -extern void -xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, - enum dma_data_direction dir); -*/ extern int xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 45bc1f83a5ad..f114bf6a8e13 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -170,7 +170,7 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. */ -void __init +static void __init swiotlb_init_with_default_size(size_t default_size, int verbose) { unsigned long bytes; @@ -206,8 +206,9 @@ swiotlb_init(int verbose) int swiotlb_late_init_with_default_size(size_t default_size) { - unsigned long i, bytes, req_nslabs = io_tlb_nslabs; + unsigned long bytes, req_nslabs = io_tlb_nslabs; unsigned int order; + int rc = 0; if (!io_tlb_nslabs) { io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); @@ -229,16 +230,32 @@ swiotlb_late_init_with_default_size(size_t default_size) order--; } - if (!io_tlb_start) - goto cleanup1; - + if (!io_tlb_start) { + io_tlb_nslabs = req_nslabs; + return -ENOMEM; + } if (order != get_order(bytes)) { printk(KERN_WARNING "Warning: only able to allocate %ld MB " "for software IO TLB\n", (PAGE_SIZE << order) >> 20); io_tlb_nslabs = SLABS_PER_PAGE << order; - bytes = io_tlb_nslabs << IO_TLB_SHIFT; } + rc = swiotlb_late_init_with_tbl(io_tlb_start, io_tlb_nslabs); + if (rc) + free_pages((unsigned long)io_tlb_start, order); + return rc; +} + +int +swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) +{ + unsigned long i, bytes; + + bytes = nslabs << IO_TLB_SHIFT; + + io_tlb_nslabs = nslabs; + io_tlb_start = tlb; io_tlb_end = io_tlb_start + bytes; + memset(io_tlb_start, 0, bytes); /* @@ -288,10 +305,8 @@ cleanup3: io_tlb_list = NULL; cleanup2: io_tlb_end = NULL; - free_pages((unsigned long)io_tlb_start, order); io_tlb_start = NULL; -cleanup1: - io_tlb_nslabs = req_nslabs; + io_tlb_nslabs = 0; return -ENOMEM; }