From aecb2016c90a1b620e21c9e143afbdc9666cce52 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 1 Sep 2020 10:33:24 +0200 Subject: [PATCH 1/3] xen/balloon: add header guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to protect against the header being included multiple times on the same compilation unit. Signed-off-by: Roger Pau Monné Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20200901083326.21264-2-roger.pau@citrix.com Signed-off-by: Juergen Gross --- include/xen/balloon.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/xen/balloon.h b/include/xen/balloon.h index 6fb95aa19405..6dbdb0b3fd03 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -2,6 +2,8 @@ /****************************************************************************** * Xen balloon functionality */ +#ifndef _XEN_BALLOON_H +#define _XEN_BALLOON_H #define RETRY_UNLIMITED 0 @@ -34,3 +36,5 @@ static inline void xen_balloon_init(void) { } #endif + +#endif /* _XEN_BALLOON_H */ From 4533d3aed857c558d6aabd00d0cb04100c5a2258 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 1 Sep 2020 10:33:25 +0200 Subject: [PATCH 2/3] memremap: rename MEMORY_DEVICE_DEVDAX to MEMORY_DEVICE_GENERIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is in preparation for the logic behind MEMORY_DEVICE_DEVDAX also being used by non DAX devices. No functional change intended. Signed-off-by: Roger Pau Monné Reviewed-by: Ira Weiny Acked-by: Andrew Morton Reviewed-by: Pankaj Gupta Link: https://lore.kernel.org/r/20200901083326.21264-3-roger.pau@citrix.com Signed-off-by: Juergen Gross --- drivers/dax/device.c | 2 +- include/linux/memremap.h | 9 ++++----- mm/memremap.c | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 4c0af2eb7e19..1e89513f3c59 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -429,7 +429,7 @@ int dev_dax_probe(struct device *dev) return -EBUSY; } - dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX; + dev_dax->pgmap.type = MEMORY_DEVICE_GENERIC; addr = devm_memremap_pages(dev, &dev_dax->pgmap); if (IS_ERR(addr)) return PTR_ERR(addr); diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 5f5b2df06e61..e5862746751b 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -46,11 +46,10 @@ struct vmem_altmap { * wakeup is used to coordinate physical address space management (ex: * fs truncate/hole punch) vs pinned pages (ex: device dma). * - * MEMORY_DEVICE_DEVDAX: + * MEMORY_DEVICE_GENERIC: * Host memory that has similar access semantics as System RAM i.e. DMA - * coherent and supports page pinning. In contrast to - * MEMORY_DEVICE_FS_DAX, this memory is access via a device-dax - * character device. + * coherent and supports page pinning. This is for example used by DAX devices + * that expose memory using a character device. * * MEMORY_DEVICE_PCI_P2PDMA: * Device memory residing in a PCI BAR intended for use with Peer-to-Peer @@ -60,7 +59,7 @@ enum memory_type { /* 0 is reserved to catch uninitialized type fields */ MEMORY_DEVICE_PRIVATE = 1, MEMORY_DEVICE_FS_DAX, - MEMORY_DEVICE_DEVDAX, + MEMORY_DEVICE_GENERIC, MEMORY_DEVICE_PCI_P2PDMA, }; diff --git a/mm/memremap.c b/mm/memremap.c index 03e38b7a38f1..006dace60b1a 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -216,7 +216,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) return ERR_PTR(-EINVAL); } break; - case MEMORY_DEVICE_DEVDAX: + case MEMORY_DEVICE_GENERIC: need_devmap_managed = false; break; case MEMORY_DEVICE_PCI_P2PDMA: From 9e2369c06c8a181478039258a4598c1ddd2cadfa Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 1 Sep 2020 10:33:26 +0200 Subject: [PATCH 3/3] xen: add helpers to allocate unpopulated memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be used in order to create foreign mappings. This is based on the ZONE_DEVICE facility which is used by persistent memory devices in order to create struct pages and kernel virtual mappings for the IOMEM areas of such devices. Note that on kernels without support for ZONE_DEVICE Xen will fallback to use ballooned pages in order to create foreign mappings. The newly added helpers use the same parameters as the existing {alloc/free}_xenballooned_pages functions, which allows for in-place replacement of the callers. Once a memory region has been added to be used as scratch mapping space it will no longer be released, and pages returned are kept in a linked list. This allows to have a buffer of pages and prevents resorting to frequent additions and removals of regions. If enabled (because ZONE_DEVICE is supported) the usage of the new functionality untangles Xen balloon and RAM hotplug from the usage of unpopulated physical memory ranges to map foreign pages, which is the correct thing to do in order to avoid mappings of foreign pages depend on memory hotplug. Note the driver is currently not enabled on Arm platforms because it would interfere with the identity mapping required on some platforms. Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20200901083326.21264-4-roger.pau@citrix.com Signed-off-by: Juergen Gross --- drivers/gpu/drm/xen/xen_drm_front_gem.c | 9 +- drivers/xen/Kconfig | 10 ++ drivers/xen/Makefile | 1 + drivers/xen/balloon.c | 4 +- drivers/xen/grant-table.c | 4 +- drivers/xen/privcmd.c | 4 +- drivers/xen/unpopulated-alloc.c | 183 ++++++++++++++++++++++++ drivers/xen/xenbus/xenbus_client.c | 6 +- drivers/xen/xlate_mmu.c | 4 +- include/xen/xen.h | 9 ++ 10 files changed, 219 insertions(+), 15 deletions(-) create mode 100644 drivers/xen/unpopulated-alloc.c diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c index 39ff95b75357..534daf37c97e 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c @@ -18,6 +18,7 @@ #include #include +#include #include "xen_drm_front.h" #include "xen_drm_front_gem.h" @@ -99,8 +100,8 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) * allocate ballooned pages which will be used to map * grant references provided by the backend */ - ret = alloc_xenballooned_pages(xen_obj->num_pages, - xen_obj->pages); + ret = xen_alloc_unpopulated_pages(xen_obj->num_pages, + xen_obj->pages); if (ret < 0) { DRM_ERROR("Cannot allocate %zu ballooned pages: %d\n", xen_obj->num_pages, ret); @@ -152,8 +153,8 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) } else { if (xen_obj->pages) { if (xen_obj->be_alloc) { - free_xenballooned_pages(xen_obj->num_pages, - xen_obj->pages); + xen_free_unpopulated_pages(xen_obj->num_pages, + xen_obj->pages); gem_free_pages_array(xen_obj); } else { drm_gem_put_pages(&xen_obj->base, diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 46e7fd099904..0ab54df82520 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -324,4 +324,14 @@ config XEN_HAVE_VPMU config XEN_FRONT_PGDIR_SHBUF tristate +config XEN_UNPOPULATED_ALLOC + bool "Use unpopulated memory ranges for guest mappings" + depends on X86 && ZONE_DEVICE + default XEN_BACKEND || XEN_GNTDEV || XEN_DOM0 + help + Use unpopulated memory ranges in order to create mappings for guest + memory regions, including grant maps and foreign pages. This avoids + having to balloon out RAM regions in order to obtain physical memory + space to create such mappings. + endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 0d322f3d90cd..3cca2be28824 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -42,3 +42,4 @@ xen-gntdev-$(CONFIG_XEN_GNTDEV_DMABUF) += gntdev-dmabuf.o xen-gntalloc-y := gntalloc.o xen-privcmd-y := privcmd.o privcmd-buf.o obj-$(CONFIG_XEN_FRONT_PGDIR_SHBUF) += xen-front-pgdir-shbuf.o +obj-$(CONFIG_XEN_UNPOPULATED_ALLOC) += unpopulated-alloc.o diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index b1d8b028bf80..4bfbe71705e4 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -654,7 +654,7 @@ void free_xenballooned_pages(int nr_pages, struct page **pages) } EXPORT_SYMBOL(free_xenballooned_pages); -#ifdef CONFIG_XEN_PV +#if defined(CONFIG_XEN_PV) && !defined(CONFIG_XEN_UNPOPULATED_ALLOC) static void __init balloon_add_region(unsigned long start_pfn, unsigned long pages) { @@ -708,7 +708,7 @@ static int __init balloon_init(void) register_sysctl_table(xen_root); #endif -#ifdef CONFIG_XEN_PV +#if defined(CONFIG_XEN_PV) && !defined(CONFIG_XEN_UNPOPULATED_ALLOC) { int i; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 8d06bf1cc347..523dcdf39cc9 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -801,7 +801,7 @@ int gnttab_alloc_pages(int nr_pages, struct page **pages) { int ret; - ret = alloc_xenballooned_pages(nr_pages, pages); + ret = xen_alloc_unpopulated_pages(nr_pages, pages); if (ret < 0) return ret; @@ -836,7 +836,7 @@ EXPORT_SYMBOL_GPL(gnttab_pages_clear_private); void gnttab_free_pages(int nr_pages, struct page **pages) { gnttab_pages_clear_private(nr_pages, pages); - free_xenballooned_pages(nr_pages, pages); + xen_free_unpopulated_pages(nr_pages, pages); } EXPORT_SYMBOL_GPL(gnttab_free_pages); diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 095d683ad574..8bcb0ce223a5 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -425,7 +425,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) if (pages == NULL) return -ENOMEM; - rc = alloc_xenballooned_pages(numpgs, pages); + rc = xen_alloc_unpopulated_pages(numpgs, pages); if (rc != 0) { pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, numpgs, rc); @@ -896,7 +896,7 @@ static void privcmd_close(struct vm_area_struct *vma) rc = xen_unmap_domain_gfn_range(vma, numgfns, pages); if (rc == 0) - free_xenballooned_pages(numpgs, pages); + xen_free_unpopulated_pages(numpgs, pages); else pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n", numpgs, rc); diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c new file mode 100644 index 000000000000..3b98dc921426 --- /dev/null +++ b/drivers/xen/unpopulated-alloc.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +static DEFINE_MUTEX(list_lock); +static LIST_HEAD(page_list); +static unsigned int list_count; + +static int fill_list(unsigned int nr_pages) +{ + struct dev_pagemap *pgmap; + void *vaddr; + unsigned int i, alloc_pages = round_up(nr_pages, PAGES_PER_SECTION); + int ret; + + pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL); + if (!pgmap) + return -ENOMEM; + + pgmap->type = MEMORY_DEVICE_GENERIC; + pgmap->res.name = "Xen scratch"; + pgmap->res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + ret = allocate_resource(&iomem_resource, &pgmap->res, + alloc_pages * PAGE_SIZE, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + if (ret < 0) { + pr_err("Cannot allocate new IOMEM resource\n"); + kfree(pgmap); + return ret; + } + +#ifdef CONFIG_XEN_HAVE_PVMMU + /* + * memremap will build page tables for the new memory so + * the p2m must contain invalid entries so the correct + * non-present PTEs will be written. + * + * If a failure occurs, the original (identity) p2m entries + * are not restored since this region is now known not to + * conflict with any devices. + */ + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + xen_pfn_t pfn = PFN_DOWN(pgmap->res.start); + + for (i = 0; i < alloc_pages; i++) { + if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) { + pr_warn("set_phys_to_machine() failed, no memory added\n"); + release_resource(&pgmap->res); + kfree(pgmap); + return -ENOMEM; + } + } + } +#endif + + vaddr = memremap_pages(pgmap, NUMA_NO_NODE); + if (IS_ERR(vaddr)) { + pr_err("Cannot remap memory range\n"); + release_resource(&pgmap->res); + kfree(pgmap); + return PTR_ERR(vaddr); + } + + for (i = 0; i < alloc_pages; i++) { + struct page *pg = virt_to_page(vaddr + PAGE_SIZE * i); + + BUG_ON(!virt_addr_valid(vaddr + PAGE_SIZE * i)); + list_add(&pg->lru, &page_list); + list_count++; + } + + return 0; +} + +/** + * xen_alloc_unpopulated_pages - alloc unpopulated pages + * @nr_pages: Number of pages + * @pages: pages returned + * @return 0 on success, error otherwise + */ +int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages) +{ + unsigned int i; + int ret = 0; + + mutex_lock(&list_lock); + if (list_count < nr_pages) { + ret = fill_list(nr_pages - list_count); + if (ret) + goto out; + } + + for (i = 0; i < nr_pages; i++) { + struct page *pg = list_first_entry_or_null(&page_list, + struct page, + lru); + + BUG_ON(!pg); + list_del(&pg->lru); + list_count--; + pages[i] = pg; + +#ifdef CONFIG_XEN_HAVE_PVMMU + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + ret = xen_alloc_p2m_entry(page_to_pfn(pg)); + if (ret < 0) { + unsigned int j; + + for (j = 0; j <= i; j++) { + list_add(&pages[j]->lru, &page_list); + list_count++; + } + goto out; + } + } +#endif + } + +out: + mutex_unlock(&list_lock); + return ret; +} +EXPORT_SYMBOL(xen_alloc_unpopulated_pages); + +/** + * xen_free_unpopulated_pages - return unpopulated pages + * @nr_pages: Number of pages + * @pages: pages to return + */ +void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages) +{ + unsigned int i; + + mutex_lock(&list_lock); + for (i = 0; i < nr_pages; i++) { + list_add(&pages[i]->lru, &page_list); + list_count++; + } + mutex_unlock(&list_lock); +} +EXPORT_SYMBOL(xen_free_unpopulated_pages); + +#ifdef CONFIG_XEN_PV +static int __init init(void) +{ + unsigned int i; + + if (!xen_domain()) + return -ENODEV; + + if (!xen_pv_domain()) + return 0; + + /* + * Initialize with pages from the extra memory regions (see + * arch/x86/xen/setup.c). + */ + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { + unsigned int j; + + for (j = 0; j < xen_extra_mem[i].n_pfns; j++) { + struct page *pg = + pfn_to_page(xen_extra_mem[i].start_pfn + j); + + list_add(&pg->lru, &page_list); + list_count++; + } + } + + return 0; +} +subsys_initcall(init); +#endif diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 907bcbb93afb..2690318ad50f 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -621,7 +621,7 @@ static int xenbus_map_ring_hvm(struct xenbus_device *dev, bool leaked = false; unsigned int nr_pages = XENBUS_PAGES(nr_grefs); - err = alloc_xenballooned_pages(nr_pages, node->hvm.pages); + err = xen_alloc_unpopulated_pages(nr_pages, node->hvm.pages); if (err) goto out_err; @@ -662,7 +662,7 @@ static int xenbus_map_ring_hvm(struct xenbus_device *dev, addr, nr_pages); out_free_ballooned_pages: if (!leaked) - free_xenballooned_pages(nr_pages, node->hvm.pages); + xen_free_unpopulated_pages(nr_pages, node->hvm.pages); out_err: return err; } @@ -858,7 +858,7 @@ static int xenbus_unmap_ring_hvm(struct xenbus_device *dev, void *vaddr) info.addrs); if (!rv) { vunmap(vaddr); - free_xenballooned_pages(nr_pages, node->hvm.pages); + xen_free_unpopulated_pages(nr_pages, node->hvm.pages); } else WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages); diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index 7b1077f0abcb..34742c6e189e 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -232,7 +232,7 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt, kfree(pages); return -ENOMEM; } - rc = alloc_xenballooned_pages(nr_pages, pages); + rc = xen_alloc_unpopulated_pages(nr_pages, pages); if (rc) { pr_warn("%s Couldn't balloon alloc %ld pages rc:%d\n", __func__, nr_pages, rc); @@ -249,7 +249,7 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt, if (!vaddr) { pr_warn("%s Couldn't map %ld pages rc:%d\n", __func__, nr_pages, rc); - free_xenballooned_pages(nr_pages, pages); + xen_free_unpopulated_pages(nr_pages, pages); kfree(pages); kfree(pfns); return -ENOMEM; diff --git a/include/xen/xen.h b/include/xen/xen.h index 19a72f591e2b..43efba045acc 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -52,4 +52,13 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, extern u64 xen_saved_max_mem_size; #endif +#ifdef CONFIG_XEN_UNPOPULATED_ALLOC +int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages); +void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages); +#else +#define xen_alloc_unpopulated_pages alloc_xenballooned_pages +#define xen_free_unpopulated_pages free_xenballooned_pages +#include +#endif + #endif /* _XEN_XEN_H */