alistair23-linux/drivers/dax/kmem.c
David Hildenbrand 60858c00e5 device-dax: don't leak kernel memory to user space after unloading kmem
Assume we have kmem configured and loaded:

  [root@localhost ~]# cat /proc/iomem
  ...
  140000000-33fffffff : Persistent Memory$
    140000000-1481fffff : namespace0.0
    150000000-33fffffff : dax0.0
      150000000-33fffffff : System RAM

Assume we try to unload kmem. This force-unloading will work, even if
memory cannot get removed from the system.

  [root@localhost ~]# rmmod kmem
  [   86.380228] removing memory fails, because memory [0x0000000150000000-0x0000000157ffffff] is onlined
  ...
  [   86.431225] kmem dax0.0: DAX region [mem 0x150000000-0x33fffffff] cannot be hotremoved until the next reboot

Now, we can reconfigure the namespace:

  [root@localhost ~]# ndctl create-namespace --force --reconfig=namespace0.0 --mode=devdax
  [  131.409351] nd_pmem namespace0.0: could not reserve region [mem 0x140000000-0x33fffffff]dax
  [  131.410147] nd_pmem: probe of namespace0.0 failed with error -16namespace0.0 --mode=devdax
  ...

This fails as expected due to the busy memory resource, and the memory
cannot be used.  However, the dax0.0 device is removed, and along its
name.

The name of the memory resource now points at freed memory (name of the
device):

  [root@localhost ~]# cat /proc/iomem
  ...
  140000000-33fffffff : Persistent Memory
    140000000-1481fffff : namespace0.0
    150000000-33fffffff : �_�^7_��/_��wR��WQ���^��� ...
    150000000-33fffffff : System RAM

We have to make sure to duplicate the string.  While at it, remove the
superfluous setting of the name and fixup a stale comment.

Fixes: 9f960da72b ("device-dax: "Hotremove" persistent memory that is used like normal RAM")
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: <stable@vger.kernel.org>	[5.3]
Link: http://lkml.kernel.org/r/20200508084217.9160-2-david@redhat.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-05-23 10:26:31 -07:00

153 lines
4.1 KiB
C

// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2016-2019 Intel Corporation. All rights reserved. */
#include <linux/memremap.h>
#include <linux/pagemap.h>
#include <linux/memory.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/pfn_t.h>
#include <linux/slab.h>
#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include "dax-private.h"
#include "bus.h"
int dev_dax_kmem_probe(struct device *dev)
{
struct dev_dax *dev_dax = to_dev_dax(dev);
struct resource *res = &dev_dax->region->res;
resource_size_t kmem_start;
resource_size_t kmem_size;
resource_size_t kmem_end;
struct resource *new_res;
const char *new_res_name;
int numa_node;
int rc;
/*
* Ensure good NUMA information for the persistent memory.
* Without this check, there is a risk that slow memory
* could be mixed in a node with faster memory, causing
* unavoidable performance issues.
*/
numa_node = dev_dax->target_node;
if (numa_node < 0) {
dev_warn(dev, "rejecting DAX region %pR with invalid node: %d\n",
res, numa_node);
return -EINVAL;
}
/* Hotplug starting at the beginning of the next block: */
kmem_start = ALIGN(res->start, memory_block_size_bytes());
kmem_size = resource_size(res);
/* Adjust the size down to compensate for moving up kmem_start: */
kmem_size -= kmem_start - res->start;
/* Align the size down to cover only complete blocks: */
kmem_size &= ~(memory_block_size_bytes() - 1);
kmem_end = kmem_start + kmem_size;
new_res_name = kstrdup(dev_name(dev), GFP_KERNEL);
if (!new_res_name)
return -ENOMEM;
/* Region is permanently reserved if hotremove fails. */
new_res = request_mem_region(kmem_start, kmem_size, new_res_name);
if (!new_res) {
dev_warn(dev, "could not reserve region [%pa-%pa]\n",
&kmem_start, &kmem_end);
kfree(new_res_name);
return -EBUSY;
}
/*
* Set flags appropriate for System RAM. Leave ..._BUSY clear
* so that add_memory() can add a child resource. Do not
* inherit flags from the parent since it may set new flags
* unknown to us that will break add_memory() below.
*/
new_res->flags = IORESOURCE_SYSTEM_RAM;
rc = add_memory(numa_node, new_res->start, resource_size(new_res));
if (rc) {
release_resource(new_res);
kfree(new_res);
kfree(new_res_name);
return rc;
}
dev_dax->dax_kmem_res = new_res;
return 0;
}
#ifdef CONFIG_MEMORY_HOTREMOVE
static int dev_dax_kmem_remove(struct device *dev)
{
struct dev_dax *dev_dax = to_dev_dax(dev);
struct resource *res = dev_dax->dax_kmem_res;
resource_size_t kmem_start = res->start;
resource_size_t kmem_size = resource_size(res);
const char *res_name = res->name;
int rc;
/*
* We have one shot for removing memory, if some memory blocks were not
* offline prior to calling this function remove_memory() will fail, and
* there is no way to hotremove this memory until reboot because device
* unbind will succeed even if we return failure.
*/
rc = remove_memory(dev_dax->target_node, kmem_start, kmem_size);
if (rc) {
dev_err(dev,
"DAX region %pR cannot be hotremoved until the next reboot\n",
res);
return rc;
}
/* Release and free dax resources */
release_resource(res);
kfree(res);
kfree(res_name);
dev_dax->dax_kmem_res = NULL;
return 0;
}
#else
static int dev_dax_kmem_remove(struct device *dev)
{
/*
* Without hotremove purposely leak the request_mem_region() for the
* device-dax range and return '0' to ->remove() attempts. The removal
* of the device from the driver always succeeds, but the region is
* permanently pinned as reserved by the unreleased
* request_mem_region().
*/
return 0;
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
static struct dax_device_driver device_dax_kmem_driver = {
.drv = {
.probe = dev_dax_kmem_probe,
.remove = dev_dax_kmem_remove,
},
};
static int __init dax_kmem_init(void)
{
return dax_driver_register(&device_dax_kmem_driver);
}
static void __exit dax_kmem_exit(void)
{
dax_driver_unregister(&device_dax_kmem_driver);
}
MODULE_AUTHOR("Intel Corporation");
MODULE_LICENSE("GPL v2");
module_init(dax_kmem_init);
module_exit(dax_kmem_exit);
MODULE_ALIAS_DAX_DEVICE(0);