diff --git a/MAINTAINERS b/MAINTAINERS index b3ea3b687a25..fd6078443083 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6160,6 +6160,7 @@ L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ S: Supported F: drivers/nvdimm/pmem.c +F: include/linux/pmem.h LINUX FOR IBM pSERIES (RS/6000) M: Paul Mackerras diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 2161fa178c8d..628a42c41ab1 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "nfit.h" @@ -305,6 +306,23 @@ static bool add_idt(struct acpi_nfit_desc *acpi_desc, return true; } +static bool add_flush(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_flush_address *flush) +{ + struct device *dev = acpi_desc->dev; + struct nfit_flush *nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush), + GFP_KERNEL); + + if (!nfit_flush) + return false; + INIT_LIST_HEAD(&nfit_flush->list); + nfit_flush->flush = flush; + list_add_tail(&nfit_flush->list, &acpi_desc->flushes); + dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__, + flush->device_handle, flush->hint_count); + return true; +} + static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table, const void *end) { @@ -338,7 +356,8 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table, return err; break; case ACPI_NFIT_TYPE_FLUSH_ADDRESS: - dev_dbg(dev, "%s: flush\n", __func__); + if (!add_flush(acpi_desc, table)) + return err; break; case ACPI_NFIT_TYPE_SMBIOS: dev_dbg(dev, "%s: smbios\n", __func__); @@ -389,6 +408,7 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc, { u16 dcr = __to_nfit_memdev(nfit_mem)->region_index; struct nfit_memdev *nfit_memdev; + struct nfit_flush *nfit_flush; struct nfit_dcr *nfit_dcr; struct nfit_bdw *nfit_bdw; struct nfit_idt *nfit_idt; @@ -442,6 +462,14 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc, nfit_mem->idt_bdw = nfit_idt->idt; break; } + + list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) { + if (nfit_flush->flush->device_handle != + nfit_memdev->memdev->device_handle) + continue; + nfit_mem->nfit_flush = nfit_flush; + break; + } break; } @@ -978,6 +1006,24 @@ static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio) return mmio->base_offset + line_offset + table_offset + sub_line_offset; } +static void wmb_blk(struct nfit_blk *nfit_blk) +{ + + if (nfit_blk->nvdimm_flush) { + /* + * The first wmb() is needed to 'sfence' all previous writes + * such that they are architecturally visible for the platform + * buffer flush. Note that we've already arranged for pmem + * writes to avoid the cache via arch_memcpy_to_pmem(). The + * final wmb() ensures ordering for the NVDIMM flush write. + */ + wmb(); + writeq(1, nfit_blk->nvdimm_flush); + wmb(); + } else + wmb_pmem(); +} + static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) { struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR]; @@ -1012,7 +1058,10 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, offset = to_interleave_offset(offset, mmio); writeq(cmd, mmio->base + offset); - /* FIXME: conditionally perform read-back if mandated by firmware */ + wmb_blk(nfit_blk); + + if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH) + readq(mmio->base + offset); } static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, @@ -1026,7 +1075,6 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES + lane * mmio->size; - /* TODO: non-temporal access, flush hints, cache management etc... */ write_blk_ctl(nfit_blk, lane, dpa, len, rw); while (len) { unsigned int c; @@ -1045,13 +1093,19 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, } if (rw) - memcpy(mmio->aperture + offset, iobuf + copied, c); + memcpy_to_pmem(mmio->aperture + offset, + iobuf + copied, c); else - memcpy(iobuf + copied, mmio->aperture + offset, c); + memcpy_from_pmem(iobuf + copied, + mmio->aperture + offset, c); copied += c; len -= c; } + + if (rw) + wmb_blk(nfit_blk); + rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0; return rc; } @@ -1124,7 +1178,7 @@ static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc, } static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, - struct acpi_nfit_system_address *spa) + struct acpi_nfit_system_address *spa, enum spa_map_type type) { resource_size_t start = spa->address; resource_size_t n = spa->length; @@ -1152,8 +1206,15 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, if (!res) goto err_mem; - /* TODO: cacheability based on the spa type */ - spa_map->iomem = ioremap_nocache(start, n); + if (type == SPA_MAP_APERTURE) { + /* + * TODO: memremap_pmem() support, but that requires cache + * flushing when the aperture is moved. + */ + spa_map->iomem = ioremap_wc(start, n); + } else + spa_map->iomem = ioremap_nocache(start, n); + if (!spa_map->iomem) goto err_map; @@ -1171,6 +1232,7 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges * @nvdimm_bus: NFIT-bus that provided the spa table entry * @nfit_spa: spa table to map + * @type: aperture or control region * * In the case where block-data-window apertures and * dimm-control-regions are interleaved they will end up sharing a @@ -1180,12 +1242,12 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, * unbound. */ static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc, - struct acpi_nfit_system_address *spa) + struct acpi_nfit_system_address *spa, enum spa_map_type type) { void __iomem *iomem; mutex_lock(&acpi_desc->spa_map_mutex); - iomem = __nfit_spa_map(acpi_desc, spa); + iomem = __nfit_spa_map(acpi_desc, spa, type); mutex_unlock(&acpi_desc->spa_map_mutex); return iomem; @@ -1206,12 +1268,35 @@ static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio, return 0; } +static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, struct nfit_blk *nfit_blk) +{ + struct nd_cmd_dimm_flags flags; + int rc; + + memset(&flags, 0, sizeof(flags)); + rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags, + sizeof(flags)); + + if (rc >= 0 && flags.status == 0) + nfit_blk->dimm_flags = flags.flags; + else if (rc == -ENOTTY) { + /* fall back to a conservative default */ + nfit_blk->dimm_flags = ND_BLK_DCR_LATCH; + rc = 0; + } else + rc = -ENXIO; + + return rc; +} + static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, struct device *dev) { struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); struct nd_blk_region *ndbr = to_nd_blk_region(dev); + struct nfit_flush *nfit_flush; struct nfit_blk_mmio *mmio; struct nfit_blk *nfit_blk; struct nfit_mem *nfit_mem; @@ -1223,8 +1308,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) { dev_dbg(dev, "%s: missing%s%s%s\n", __func__, nfit_mem ? "" : " nfit_mem", - nfit_mem->dcr ? "" : " dcr", - nfit_mem->bdw ? "" : " bdw"); + (nfit_mem && nfit_mem->dcr) ? "" : " dcr", + (nfit_mem && nfit_mem->bdw) ? "" : " bdw"); return -ENXIO; } @@ -1237,7 +1322,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, /* map block aperture memory */ nfit_blk->bdw_offset = nfit_mem->bdw->offset; mmio = &nfit_blk->mmio[BDW]; - mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw); + mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw, + SPA_MAP_APERTURE); if (!mmio->base) { dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, nvdimm_name(nvdimm)); @@ -1259,7 +1345,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, nfit_blk->cmd_offset = nfit_mem->dcr->command_offset; nfit_blk->stat_offset = nfit_mem->dcr->status_offset; mmio = &nfit_blk->mmio[DCR]; - mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr); + mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr, + SPA_MAP_CONTROL); if (!mmio->base) { dev_dbg(dev, "%s: %s failed to map dcr\n", __func__, nvdimm_name(nvdimm)); @@ -1277,6 +1364,24 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, return rc; } + rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk); + if (rc < 0) { + dev_dbg(dev, "%s: %s failed get DIMM flags\n", + __func__, nvdimm_name(nvdimm)); + return rc; + } + + nfit_flush = nfit_mem->nfit_flush; + if (nfit_flush && nfit_flush->flush->hint_count != 0) { + nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev, + nfit_flush->flush->hint_address[0], 8); + if (!nfit_blk->nvdimm_flush) + return -ENOMEM; + } + + if (!arch_has_pmem_api() && !nfit_blk->nvdimm_flush) + dev_warn(dev, "unable to guarantee persistence of writes\n"); + if (mmio->line_size == 0) return 0; @@ -1459,6 +1564,7 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) INIT_LIST_HEAD(&acpi_desc->dcrs); INIT_LIST_HEAD(&acpi_desc->bdws); INIT_LIST_HEAD(&acpi_desc->idts); + INIT_LIST_HEAD(&acpi_desc->flushes); INIT_LIST_HEAD(&acpi_desc->memdevs); INIT_LIST_HEAD(&acpi_desc->dimms); mutex_init(&acpi_desc->spa_map_mutex); diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 81f2e8c5a79c..79b6d83875c1 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -40,6 +40,10 @@ enum nfit_uuids { NFIT_UUID_MAX, }; +enum { + ND_BLK_DCR_LATCH = 2, +}; + struct nfit_spa { struct acpi_nfit_system_address *spa; struct list_head list; @@ -60,6 +64,11 @@ struct nfit_idt { struct list_head list; }; +struct nfit_flush { + struct acpi_nfit_flush_address *flush; + struct list_head list; +}; + struct nfit_memdev { struct acpi_nfit_memory_map *memdev; struct list_head list; @@ -77,6 +86,7 @@ struct nfit_mem { struct acpi_nfit_system_address *spa_bdw; struct acpi_nfit_interleave *idt_dcr; struct acpi_nfit_interleave *idt_bdw; + struct nfit_flush *nfit_flush; struct list_head list; struct acpi_device *adev; unsigned long dsm_mask; @@ -88,6 +98,7 @@ struct acpi_nfit_desc { struct mutex spa_map_mutex; struct list_head spa_maps; struct list_head memdevs; + struct list_head flushes; struct list_head dimms; struct list_head spas; struct list_head dcrs; @@ -109,7 +120,7 @@ struct nfit_blk { struct nfit_blk_mmio { union { void __iomem *base; - void *aperture; + void __pmem *aperture; }; u64 size; u64 base_offset; @@ -123,6 +134,13 @@ struct nfit_blk { u64 bdw_offset; /* post interleave offset */ u64 stat_offset; u64 cmd_offset; + void __iomem *nvdimm_flush; + u32 dimm_flags; +}; + +enum spa_map_type { + SPA_MAP_CONTROL, + SPA_MAP_APERTURE, }; struct nfit_spa_mapping { diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 8eb22c0ca7ce..7e2c43f701bc 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -535,8 +535,6 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, __func__, dimm_name, cmd_name, i); return -ENXIO; } - if (!access_ok(VERIFY_READ, p + in_len, in_size)) - return -EFAULT; if (in_len < sizeof(in_env)) copy = min_t(u32, sizeof(in_env) - in_len, in_size); else @@ -557,8 +555,6 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, __func__, dimm_name, cmd_name, i); return -EFAULT; } - if (!access_ok(VERIFY_WRITE, p + in_len + out_len, out_size)) - return -EFAULT; if (out_len < sizeof(out_env)) copy = min_t(u32, sizeof(out_env) - out_len, out_size); else @@ -570,9 +566,6 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, } buf_len = out_len + in_len; - if (!access_ok(VERIFY_WRITE, p, sizeof(buf_len))) - return -EFAULT; - if (buf_len > ND_IOCTL_MAX_BUFLEN) { dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__, dimm_name, cmd_name, buf_len, @@ -706,8 +699,10 @@ int __init nvdimm_bus_init(void) nvdimm_major = rc; nd_class = class_create(THIS_MODULE, "nd"); - if (IS_ERR(nd_class)) + if (IS_ERR(nd_class)) { + rc = PTR_ERR(nd_class); goto err_class; + } return 0; diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 7f8ad9593da7..e08a6ae7c0a4 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -17,11 +17,11 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) +# define __pmem __attribute__((noderef, address_space(5))) #ifdef CONFIG_SPARSE_RCU_POINTER # define __rcu __attribute__((noderef, address_space(4))) #else # define __rcu -# define __pmem __attribute__((noderef, address_space(5))) #endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 8e9b64520ec1..f56914c7929b 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -1,3 +1,6 @@ +ldflags-y += --wrap=ioremap_wt +ldflags-y += --wrap=ioremap_wc +ldflags-y += --wrap=devm_ioremap_nocache ldflags-y += --wrap=ioremap_cache ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index c85a6f6ba559..64bfaa50831c 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -65,6 +65,21 @@ void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, return fallback_fn(offset, size); } +void __iomem *__wrap_devm_ioremap_nocache(struct device *dev, + resource_size_t offset, unsigned long size) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res(offset); + rcu_read_unlock(); + if (nfit_res) + return (void __iomem *) nfit_res->buf + offset + - nfit_res->res->start; + return devm_ioremap_nocache(dev, offset, size); +} +EXPORT_SYMBOL(__wrap_devm_ioremap_nocache); + void __iomem *__wrap_ioremap_cache(resource_size_t offset, unsigned long size) { return __nfit_test_ioremap(offset, size, ioremap_cache); @@ -77,6 +92,18 @@ void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size) } EXPORT_SYMBOL(__wrap_ioremap_nocache); +void __iomem *__wrap_ioremap_wt(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_wt); +} +EXPORT_SYMBOL(__wrap_ioremap_wt); + +void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_wc); +} +EXPORT_SYMBOL(__wrap_ioremap_wc); + void __wrap_iounmap(volatile void __iomem *addr) { struct nfit_test_resource *nfit_res; diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 4b69b8368de0..d0bdae40ccc9 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -128,6 +128,8 @@ struct nfit_test { int num_pm; void **dimm; dma_addr_t *dimm_dma; + void **flush; + dma_addr_t *flush_dma; void **label; dma_addr_t *label_dma; void **spa_set; @@ -155,7 +157,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, int i, rc; if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask)) - return -ENXIO; + return -ENOTTY; /* lookup label space for the given dimm */ for (i = 0; i < ARRAY_SIZE(handle); i++) @@ -331,7 +333,8 @@ static int nfit_test0_alloc(struct nfit_test *t) + sizeof(struct acpi_nfit_system_address) * NUM_SPA + sizeof(struct acpi_nfit_memory_map) * NUM_MEM + sizeof(struct acpi_nfit_control_region) * NUM_DCR - + sizeof(struct acpi_nfit_data_region) * NUM_BDW; + + sizeof(struct acpi_nfit_data_region) * NUM_BDW + + sizeof(struct acpi_nfit_flush_address) * NUM_DCR; int i; t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); @@ -356,6 +359,10 @@ static int nfit_test0_alloc(struct nfit_test *t) if (!t->label[i]) return -ENOMEM; sprintf(t->label[i], "label%d", i); + + t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]); + if (!t->flush[i]) + return -ENOMEM; } for (i = 0; i < NUM_DCR; i++) { @@ -408,6 +415,7 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_system_address *spa; struct acpi_nfit_control_region *dcr; struct acpi_nfit_data_region *bdw; + struct acpi_nfit_flush_address *flush; unsigned int offset; nfit_test_init_header(nfit_buf, size); @@ -831,6 +839,39 @@ static void nfit_test0_setup(struct nfit_test *t) bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset = offset + sizeof(struct acpi_nfit_data_region) * 4; + /* flush0 (dimm0) */ + flush = nfit_buf + offset; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[0]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[0]; + + /* flush1 (dimm1) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[1]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[1]; + + /* flush2 (dimm2) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[2]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[2]; + + /* flush3 (dimm3) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[3]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[3]; + acpi_desc = &t->acpi_desc; set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); @@ -933,6 +974,10 @@ static int nfit_test_probe(struct platform_device *pdev) GFP_KERNEL); nfit_test->dimm_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), GFP_KERNEL); + nfit_test->flush = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->flush_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), + GFP_KERNEL); nfit_test->label = devm_kcalloc(dev, num, sizeof(void *), GFP_KERNEL); nfit_test->label_dma = devm_kcalloc(dev, num, @@ -943,7 +988,8 @@ static int nfit_test_probe(struct platform_device *pdev) sizeof(dma_addr_t), GFP_KERNEL); if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label && nfit_test->label_dma && nfit_test->dcr - && nfit_test->dcr_dma) + && nfit_test->dcr_dma && nfit_test->flush + && nfit_test->flush_dma) /* pass */; else return -ENOMEM;