alistair23-linux/drivers/nvdimm/pmem.c
Dan Williams 5813882094 libnvdimm, nfit: handle unarmed dimms, mark namespaces read-only
Upon detection of an unarmed dimm in a region, arrange for descendant
BTT, PMEM, or BLK instances to be read-only.  A dimm is primarily marked
"unarmed" via flags passed by platform firmware (NFIT).

The flags in the NFIT memory device sub-structure indicate the state of
the data on the nvdimm relative to its energy source or last "flush to
persistence".  For the most part there is nothing the driver can do but
advertise the state of these flags in sysfs and emit a message if
firmware indicates that the contents of the device may be corrupted.
However, for the case of ACPI_NFIT_MEM_ARMED, the driver can arrange for
the block devices incorporating that nvdimm to be marked read-only.
This is a safe default as the data is still available and new writes are
held off until the administrator either forces read-write mode, or the
energy source becomes armed.

A 'read_only' attribute is added to REGION devices to allow for
overriding the default read-only policy of all descendant block devices.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2015-06-26 11:23:38 -04:00

295 lines
7 KiB
C

/*
* Persistent Memory Driver
*
* Copyright (c) 2014-2015, Intel Corporation.
* Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
* Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <asm/cacheflush.h>
#include <linux/blkdev.h>
#include <linux/hdreg.h>
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/slab.h>
#include <linux/nd.h>
#include "nd.h"
struct pmem_device {
struct request_queue *pmem_queue;
struct gendisk *pmem_disk;
/* One contiguous memory region per device */
phys_addr_t phys_addr;
void *virt_addr;
size_t size;
};
static int pmem_major;
static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
unsigned int len, unsigned int off, int rw,
sector_t sector)
{
void *mem = kmap_atomic(page);
size_t pmem_off = sector << 9;
if (rw == READ) {
memcpy(mem + off, pmem->virt_addr + pmem_off, len);
flush_dcache_page(page);
} else {
flush_dcache_page(page);
memcpy(pmem->virt_addr + pmem_off, mem + off, len);
}
kunmap_atomic(mem);
}
static void pmem_make_request(struct request_queue *q, struct bio *bio)
{
bool do_acct;
unsigned long start;
struct bio_vec bvec;
struct bvec_iter iter;
struct block_device *bdev = bio->bi_bdev;
struct pmem_device *pmem = bdev->bd_disk->private_data;
do_acct = nd_iostat_start(bio, &start);
bio_for_each_segment(bvec, bio, iter)
pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset,
bio_data_dir(bio), iter.bi_sector);
if (do_acct)
nd_iostat_end(bio, start);
bio_endio(bio, 0);
}
static int pmem_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, int rw)
{
struct pmem_device *pmem = bdev->bd_disk->private_data;
pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector);
page_endio(page, rw & WRITE, 0);
return 0;
}
static long pmem_direct_access(struct block_device *bdev, sector_t sector,
void **kaddr, unsigned long *pfn, long size)
{
struct pmem_device *pmem = bdev->bd_disk->private_data;
size_t offset = sector << 9;
if (!pmem)
return -ENODEV;
*kaddr = pmem->virt_addr + offset;
*pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
return pmem->size - offset;
}
static const struct block_device_operations pmem_fops = {
.owner = THIS_MODULE,
.rw_page = pmem_rw_page,
.direct_access = pmem_direct_access,
.revalidate_disk = nvdimm_revalidate_disk,
};
static struct pmem_device *pmem_alloc(struct device *dev,
struct resource *res, int id)
{
struct pmem_device *pmem;
pmem = kzalloc(sizeof(*pmem), GFP_KERNEL);
if (!pmem)
return ERR_PTR(-ENOMEM);
pmem->phys_addr = res->start;
pmem->size = resource_size(res);
if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) {
dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
&pmem->phys_addr, pmem->size);
kfree(pmem);
return ERR_PTR(-EBUSY);
}
/*
* Map the memory as non-cachable, as we can't write back the contents
* of the CPU caches in case of a crash.
*/
pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size);
if (!pmem->virt_addr) {
release_mem_region(pmem->phys_addr, pmem->size);
kfree(pmem);
return ERR_PTR(-ENXIO);
}
return pmem;
}
static void pmem_detach_disk(struct pmem_device *pmem)
{
del_gendisk(pmem->pmem_disk);
put_disk(pmem->pmem_disk);
blk_cleanup_queue(pmem->pmem_queue);
}
static int pmem_attach_disk(struct nd_namespace_common *ndns,
struct pmem_device *pmem)
{
struct gendisk *disk;
pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL);
if (!pmem->pmem_queue)
return -ENOMEM;
blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
disk = alloc_disk(0);
if (!disk) {
blk_cleanup_queue(pmem->pmem_queue);
return -ENOMEM;
}
disk->major = pmem_major;
disk->first_minor = 0;
disk->fops = &pmem_fops;
disk->private_data = pmem;
disk->queue = pmem->pmem_queue;
disk->flags = GENHD_FL_EXT_DEVT;
nvdimm_namespace_disk_name(ndns, disk->disk_name);
disk->driverfs_dev = &ndns->dev;
set_capacity(disk, pmem->size >> 9);
pmem->pmem_disk = disk;
add_disk(disk);
revalidate_disk(disk);
return 0;
}
static int pmem_rw_bytes(struct nd_namespace_common *ndns,
resource_size_t offset, void *buf, size_t size, int rw)
{
struct pmem_device *pmem = dev_get_drvdata(ndns->claim);
if (unlikely(offset + size > pmem->size)) {
dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
return -EFAULT;
}
if (rw == READ)
memcpy(buf, pmem->virt_addr + offset, size);
else
memcpy(pmem->virt_addr + offset, buf, size);
return 0;
}
static void pmem_free(struct pmem_device *pmem)
{
iounmap(pmem->virt_addr);
release_mem_region(pmem->phys_addr, pmem->size);
kfree(pmem);
}
static int nd_pmem_probe(struct device *dev)
{
struct nd_region *nd_region = to_nd_region(dev->parent);
struct nd_namespace_common *ndns;
struct nd_namespace_io *nsio;
struct pmem_device *pmem;
int rc;
ndns = nvdimm_namespace_common_probe(dev);
if (IS_ERR(ndns))
return PTR_ERR(ndns);
nsio = to_nd_namespace_io(&ndns->dev);
pmem = pmem_alloc(dev, &nsio->res, nd_region->id);
if (IS_ERR(pmem))
return PTR_ERR(pmem);
dev_set_drvdata(dev, pmem);
ndns->rw_bytes = pmem_rw_bytes;
if (is_nd_btt(dev))
rc = nvdimm_namespace_attach_btt(ndns);
else if (nd_btt_probe(ndns, pmem) == 0) {
/* we'll come back as btt-pmem */
rc = -ENXIO;
} else
rc = pmem_attach_disk(ndns, pmem);
if (rc)
pmem_free(pmem);
return rc;
}
static int nd_pmem_remove(struct device *dev)
{
struct pmem_device *pmem = dev_get_drvdata(dev);
if (is_nd_btt(dev))
nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
else
pmem_detach_disk(pmem);
pmem_free(pmem);
return 0;
}
MODULE_ALIAS("pmem");
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
static struct nd_device_driver nd_pmem_driver = {
.probe = nd_pmem_probe,
.remove = nd_pmem_remove,
.drv = {
.name = "nd_pmem",
},
.type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
};
static int __init pmem_init(void)
{
int error;
pmem_major = register_blkdev(0, "pmem");
if (pmem_major < 0)
return pmem_major;
error = nd_driver_register(&nd_pmem_driver);
if (error) {
unregister_blkdev(pmem_major, "pmem");
return error;
}
return 0;
}
module_init(pmem_init);
static void pmem_exit(void)
{
driver_unregister(&nd_pmem_driver.drv);
unregister_blkdev(pmem_major, "pmem");
}
module_exit(pmem_exit);
MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
MODULE_LICENSE("GPL v2");