From 3c5c98d135f50c516631806ce77e50e7ce33bde8 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Wed, 19 Sep 2018 18:28:48 +0530 Subject: [PATCH 01/21] libnvdimm: remove duplicate include Removed duplicate include. Signed-off-by: Pankaj Gupta Signed-off-by: Dan Williams --- drivers/nvdimm/nd-core.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index ac68072fb8cd..182258f64417 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -14,7 +14,6 @@ #define __ND_CORE_H__ #include #include -#include #include #include #include From b6eae0f61db27748606cc00dafcfd1e2c032f0a5 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 25 Sep 2018 13:53:02 -0700 Subject: [PATCH 02/21] libnvdimm: Hold reference on parent while scheduling async init Unlike asynchronous initialization in the core we have not yet associated the device with the parent, and as such the device doesn't hold a reference to the parent. In order to resolve that we should be holding a reference on the parent until the asynchronous initialization has completed. Cc: Fixes: 4d88a97aa9e8 ("libnvdimm: ...base ... infrastructure") Signed-off-by: Alexander Duyck Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 8aae6dcc839f..9148015ed803 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -488,6 +488,8 @@ static void nd_async_device_register(void *d, async_cookie_t cookie) put_device(dev); } put_device(dev); + if (dev->parent) + put_device(dev->parent); } static void nd_async_device_unregister(void *d, async_cookie_t cookie) @@ -507,6 +509,8 @@ void __nd_device_register(struct device *dev) if (!dev) return; dev->bus = &nvdimm_bus_type; + if (dev->parent) + get_device(dev->parent); get_device(dev); async_schedule_domain(nd_async_device_register, dev, &nd_async_domain); From 1a091d16dbff6d373c5335e08316756e6c39f048 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 25 Sep 2018 13:53:07 -0700 Subject: [PATCH 03/21] libnvdimm: Set device node in nd_device_register This change makes it so that we don't repeatedly overwrite the device node for nvdimm regions. The earliest we can set the node is immediately after calling device init, so I have moved the code there so we can avoid rewriting the node with each uevent. Signed-off-by: Alexander Duyck Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 9148015ed803..f1fb39921236 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -54,12 +54,6 @@ static int to_nd_device_type(struct device *dev) static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env) { - /* - * Ensure that region devices always have their numa node set as - * early as possible. - */ - if (is_nd_region(dev)) - set_dev_node(dev, to_nd_region(dev)->numa_node); return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT, to_nd_device_type(dev)); } @@ -508,6 +502,16 @@ void __nd_device_register(struct device *dev) { if (!dev) return; + + /* + * Ensure that region devices always have their NUMA node set as + * early as possible. This way we are able to make certain that + * any memory associated with the creation and the creation + * itself of the region is associated with the correct node. + */ + if (is_nd_region(dev)) + set_dev_node(dev, to_nd_region(dev)->numa_node); + dev->bus = &nvdimm_bus_type; if (dev->parent) get_device(dev->parent); From 48af2f7e52f4cd5eb41ebcd1bd2e326aa2d33d32 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Tue, 18 Sep 2018 17:48:31 -0600 Subject: [PATCH 04/21] libnvdimm, pfn: during init, clear errors in the metadata area If there are badblocks present in the 'struct page' area for pfn namespaces, until now, the only way to clear them has been to force the namespace into raw mode, clear the errors, and re-enable the fsdax mode. This is clunky, given that it should be easy enough for the pfn driver to do the same. Add a new helper that uses the most recently available badblocks list to check whether there are any badblocks that lie in the volatile struct page area. If so, before initializing the struct pages, send down targeted writes via nvdimm_write_bytes to write zeroes to the affected blocks, and thus clear errors. Cc: Dan Williams Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 61 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 3f7ad5bc443e..24c64090169e 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -361,6 +361,65 @@ struct device *nd_pfn_create(struct nd_region *nd_region) return dev; } +/* + * nd_pfn_clear_memmap_errors() clears any errors in the volatile memmap + * space associated with the namespace. If the memmap is set to DRAM, then + * this is a no-op. Since the memmap area is freshly initialized during + * probe, we have an opportunity to clear any badblocks in this area. + */ +static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) +{ + struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); + struct nd_namespace_common *ndns = nd_pfn->ndns; + void *zero_page = page_address(ZERO_PAGE(0)); + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + int num_bad, meta_num, rc, bb_present; + sector_t first_bad, meta_start; + struct nd_namespace_io *nsio; + + if (nd_pfn->mode != PFN_MODE_PMEM) + return 0; + + nsio = to_nd_namespace_io(&ndns->dev); + meta_start = (SZ_4K + sizeof(*pfn_sb)) >> 9; + meta_num = (le64_to_cpu(pfn_sb->dataoff) >> 9) - meta_start; + + do { + unsigned long zero_len; + u64 nsoff; + + bb_present = badblocks_check(&nd_region->bb, meta_start, + meta_num, &first_bad, &num_bad); + if (bb_present) { + dev_dbg(&nd_pfn->dev, "meta: %x badblocks at %lx\n", + num_bad, first_bad); + nsoff = ALIGN_DOWN((nd_region->ndr_start + + (first_bad << 9)) - nsio->res.start, + PAGE_SIZE); + zero_len = ALIGN(num_bad << 9, PAGE_SIZE); + while (zero_len) { + unsigned long chunk = min(zero_len, PAGE_SIZE); + + rc = nvdimm_write_bytes(ndns, nsoff, zero_page, + chunk, 0); + if (rc) + break; + + zero_len -= chunk; + nsoff += chunk; + } + if (rc) { + dev_err(&nd_pfn->dev, + "error clearing %x badblocks at %lx\n", + num_bad, first_bad); + return rc; + } + } + } while (bb_present); + + return 0; +} + int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) { u64 checksum, offset; @@ -477,7 +536,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -ENXIO; } - return 0; + return nd_pfn_clear_memmap_errors(nd_pfn); } EXPORT_SYMBOL(nd_pfn_validate); From 5d394eee2c102453278d81d9a7cf94c80253486a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 27 Sep 2018 15:01:55 -0700 Subject: [PATCH 05/21] libnvdimm, region: Fail badblocks listing for inactive regions While experimenting with region driver loading the following backtrace was triggered: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. [..] Call Trace: dump_stack+0x85/0xcb register_lock_class+0x571/0x580 ? __lock_acquire+0x2ba/0x1310 ? kernfs_seq_start+0x2a/0x80 __lock_acquire+0xd4/0x1310 ? dev_attr_show+0x1c/0x50 ? __lock_acquire+0x2ba/0x1310 ? kernfs_seq_start+0x2a/0x80 ? lock_acquire+0x9e/0x1a0 lock_acquire+0x9e/0x1a0 ? dev_attr_show+0x1c/0x50 badblocks_show+0x70/0x190 ? dev_attr_show+0x1c/0x50 dev_attr_show+0x1c/0x50 This results from a missing successful call to devm_init_badblocks() from nd_region_probe(). Block attempts to show badblocks while the region is not enabled. Fixes: 6a6bef90425e ("libnvdimm: add mechanism to publish badblocks...") Cc: Reviewed-by: Johannes Thumshirn Reviewed-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/nvdimm/region_devs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index fa37afcd43ff..174a418cb171 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -560,10 +560,17 @@ static ssize_t region_badblocks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nd_region *nd_region = to_nd_region(dev); + ssize_t rc; - return badblocks_show(&nd_region->bb, buf, 0); + device_lock(dev); + if (dev->driver) + rc = badblocks_show(&nd_region->bb, buf, 0); + else + rc = -ENXIO; + device_unlock(dev); + + return rc; } - static DEVICE_ATTR(badblocks, 0444, region_badblocks_show, NULL); static ssize_t resource_show(struct device *dev, From 55781b66936ee4e15cdd6d591b26662ab9d2d847 Mon Sep 17 00:00:00 2001 From: GuangZhe Fu Date: Mon, 1 Oct 2018 23:35:00 -0400 Subject: [PATCH 06/21] libnvdimm, namespace: Drop the repeat assignment for variable dev->parent The variable dev-parent is assigned twice with the same &nd_region->dev. I think we could drop the second one. Signed-off-by: GuangZhe Fu Signed-off-by: Dan Williams --- drivers/nvdimm/namespace_devs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 4a4266250c28..681af3a8fd62 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2099,7 +2099,6 @@ static struct device *nd_namespace_pmem_create(struct nd_region *nd_region) return NULL; } dev_set_name(dev, "namespace%d.%d", nd_region->id, nspm->id); - dev->parent = &nd_region->dev; dev->groups = nd_namespace_attribute_groups; nd_namespace_pmem_set_resource(nd_region, nspm, 0); From 91ed7ac444ef749603a95629a5ec483988c4f14b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 4 Oct 2018 16:32:08 -0700 Subject: [PATCH 07/21] libnvdimm, pmem: Fix badblocks population for 'raw' namespaces The driver is only initializing bb_res in the devm_memremap_pages() paths, but the raw namespace case is passing an uninitialized bb_res to nvdimm_badblocks_populate(). Fixes: e8d513483300 ("memremap: change devm_memremap_pages interface...") Cc: Cc: Christoph Hellwig Reported-by: Jacek Zloch Reported-by: Krzysztof Rusocki Reviewed-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 6071e2942053..2082ae01b9c8 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -421,9 +421,11 @@ static int pmem_attach_disk(struct device *dev, addr = devm_memremap_pages(dev, &pmem->pgmap); pmem->pfn_flags |= PFN_MAP; memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); - } else + } else { addr = devm_memremap(dev, pmem->phys_addr, pmem->size, ARCH_MEMREMAP_PMEM); + memcpy(&bb_res, &nsio->res, sizeof(bb_res)); + } /* * At release time the queue must be frozen before From d11cf4a7321b538563b0ab30dc0d1f18f9c56226 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 10 Oct 2018 16:38:24 -0700 Subject: [PATCH 08/21] libnvdimm, dimm: Maximize label transfer size Use kvzalloc() to bypass the arbitrary PAGE_SIZE limit of label transfer operations. Given the expense of calling into firmware, maximize the amount of label data we transfer per call to be up to the total label space if allowed by the firmware. Instead of limiting based on PAGE_SIZE we can instead simply limit the maximum size based on either the config_size int he case of the get operation, or the length of the write based on the set operation. On a system with 24 NVDIMM modules each with a config_size of 128K and a maximum transfer size of 64K - 4, this patch reduces the init time for the label data from around 24 seconds down to between 4-5 seconds. Reviewed-by: Toshi Kani Signed-off-by: Alexander Duyck Signed-off-by: Dan Williams --- drivers/nvdimm/dimm_devs.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 863cabc35215..75ac78017b15 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -111,8 +111,8 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) if (!ndd->data) return -ENOMEM; - max_cmd_size = min_t(u32, PAGE_SIZE, ndd->nsarea.max_xfer); - cmd = kzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL); + max_cmd_size = min_t(u32, ndd->nsarea.config_size, ndd->nsarea.max_xfer); + cmd = kvzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; @@ -134,7 +134,7 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length); } dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc); - kfree(cmd); + kvfree(cmd); return rc; } @@ -157,9 +157,8 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, if (offset + len > ndd->nsarea.config_size) return -ENXIO; - max_cmd_size = min_t(u32, PAGE_SIZE, len); - max_cmd_size = min_t(u32, max_cmd_size, ndd->nsarea.max_xfer); - cmd = kzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL); + max_cmd_size = min_t(u32, len, ndd->nsarea.max_xfer); + cmd = kvzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL); if (!cmd) return -ENOMEM; @@ -183,7 +182,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, break; } } - kfree(cmd); + kvfree(cmd); return rc; } From d86d4d63d88861107d3bfc84be7294552231ecd0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 10 Oct 2018 16:38:41 -0700 Subject: [PATCH 09/21] nvdimm: Sanity check labeloff This patch adds validation for the labeloff field in the indexes. Reviewed-by: Toshi Kani Signed-off-by: Alexander Duyck Signed-off-by: Dan Williams --- drivers/nvdimm/label.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 1d28cd656536..1f5842509dbc 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -183,6 +183,13 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd) __le64_to_cpu(nsindex[i]->otheroff)); continue; } + if (__le64_to_cpu(nsindex[i]->labeloff) + != 2 * sizeof_namespace_index(ndd)) { + dev_dbg(dev, "nsindex%d labeloff: %#llx invalid\n", + i, (unsigned long long) + __le64_to_cpu(nsindex[i]->labeloff)); + continue; + } size = __le64_to_cpu(nsindex[i]->mysize); if (size > sizeof_namespace_index(ndd) From 1cfeb66e8e137be8e01b88bb4d416e987abda4a4 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 10 Oct 2018 16:38:55 -0700 Subject: [PATCH 10/21] nvdimm: Clarify comment in sizeof_namespace_index When working on the label code I found it rather confusing to see several spots that reference a minimum label size of 256 while working with labels that are 128 bytes in size. This patch is meant to provide a clarification on one of the comments that was at the heart of the issue. Specifically for version 1.2 and later of the namespace specification the minimum label size is 256, prior to that the minimum label size was 128. So we should state that as such to avoid confusion. Reviewed-by: Toshi Kani Signed-off-by: Alexander Duyck Signed-off-by: Dan Williams --- drivers/nvdimm/label.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 1f5842509dbc..bb813b8e8ace 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -75,7 +75,8 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) /* * Per UEFI 2.7, the minimum size of the Label Storage Area is large * enough to hold 2 index blocks and 2 labels. The minimum index - * block size is 256 bytes, and the minimum label size is 256 bytes. + * block size is 256 bytes. The label size is 128 for namespaces + * prior to version 1.2 and at minimum 256 for version 1.2 and later. */ nslot = nvdimm_num_label_slots(ndd); space = ndd->nsarea.config_size - nslot * sizeof_namespace_label(ndd); From 19418b024427ec60ba6084addf691a8d93670398 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 10 Oct 2018 16:39:06 -0700 Subject: [PATCH 11/21] nvdimm: Remove empty if statement This patch removes an empty statement from an if expression and promotes the else statement to the if expression with the expression logic reversed. I feel this is more readable as the empty statement can lead to issues if any additional logic was ever added. Reviewed-by: Toshi Kani Signed-off-by: Alexander Duyck Signed-off-by: Dan Williams --- drivers/nvdimm/label.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index bb813b8e8ace..43bad0d5bdb6 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -261,9 +261,8 @@ int nd_label_validate(struct nvdimm_drvdata *ndd) void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, struct nd_namespace_index *src) { - if (dst && src) - /* pass */; - else + /* just exit if either destination or source is NULL */ + if (!dst || !src) return; memcpy(dst, src, sizeof_namespace_index(ndd)); From 2d657d17f72d2ae70c02f0d0ea6a04ad0f016b57 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 10 Oct 2018 16:39:20 -0700 Subject: [PATCH 12/21] nvdimm: Split label init out from the logic for getting config data This patch splits the initialization of the label data into two functions. One for doing the init, and another for reading the actual configuration data. The idea behind this is that by doing this we create a symmetry between the getting and setting of config data in that we have a function for both. In addition it will make it easier for us to identify the bits that are related to init versus the pieces that are a wrapper for reading data from the ACPI interface. So for example by splitting things out like this it becomes much more obvious that we were performing checks that weren't necessarily related to the set/get operations such as relying on ndd->data being present when the set and get ops should not care about a locally cached copy of the label area. Reviewed-by: Toshi Kani Signed-off-by: Alexander Duyck Signed-off-by: Dan Williams --- drivers/nvdimm/dimm.c | 2 +- drivers/nvdimm/dimm_devs.c | 49 +++++++++++++++----------------------- drivers/nvdimm/label.c | 38 +++++++++++++++++++++++++++++ drivers/nvdimm/label.h | 1 + drivers/nvdimm/nd.h | 2 ++ 5 files changed, 61 insertions(+), 31 deletions(-) diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c index 6c8fb7590838..07bf96948553 100644 --- a/drivers/nvdimm/dimm.c +++ b/drivers/nvdimm/dimm.c @@ -75,7 +75,7 @@ static int nvdimm_probe(struct device *dev) * DIMM capacity. We fail the dimm probe to prevent regions from * attempting to parse the label area. */ - rc = nvdimm_init_config_data(ndd); + rc = nd_label_data_init(ndd); if (rc == -EACCES) nvdimm_set_locked(dev); if (rc) diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 75ac78017b15..6c3de2317390 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -85,55 +85,47 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) return cmd_rc; } -int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) +int nvdimm_get_config_data(struct nvdimm_drvdata *ndd, void *buf, + size_t offset, size_t len) { struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; int rc = validate_dimm(ndd), cmd_rc = 0; struct nd_cmd_get_config_data_hdr *cmd; - struct nvdimm_bus_descriptor *nd_desc; - u32 max_cmd_size, config_size; - size_t offset; + size_t max_cmd_size, buf_offset; if (rc) return rc; - if (ndd->data) - return 0; - - if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0 - || ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) { - dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n", - ndd->nsarea.max_xfer, ndd->nsarea.config_size); + if (offset + len > ndd->nsarea.config_size) return -ENXIO; - } - ndd->data = kvmalloc(ndd->nsarea.config_size, GFP_KERNEL); - if (!ndd->data) - return -ENOMEM; - - max_cmd_size = min_t(u32, ndd->nsarea.config_size, ndd->nsarea.max_xfer); + max_cmd_size = min_t(u32, len, ndd->nsarea.max_xfer); cmd = kvzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; - nd_desc = nvdimm_bus->nd_desc; - for (config_size = ndd->nsarea.config_size, offset = 0; - config_size; config_size -= cmd->in_length, - offset += cmd->in_length) { - cmd->in_length = min(config_size, max_cmd_size); - cmd->in_offset = offset; + for (buf_offset = 0; len; + len -= cmd->in_length, buf_offset += cmd->in_length) { + size_t cmd_size; + + cmd->in_offset = offset + buf_offset; + cmd->in_length = min(max_cmd_size, len); + + cmd_size = sizeof(*cmd) + cmd->in_length; + rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), - ND_CMD_GET_CONFIG_DATA, cmd, - cmd->in_length + sizeof(*cmd), &cmd_rc); + ND_CMD_GET_CONFIG_DATA, cmd, cmd_size, &cmd_rc); if (rc < 0) break; if (cmd_rc < 0) { rc = cmd_rc; break; } - memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length); + + /* out_buf should be valid, copy it into our output buffer */ + memcpy(buf + buf_offset, cmd->out_buf, cmd->in_length); } - dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc); kvfree(cmd); return rc; @@ -151,9 +143,6 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, if (rc) return rc; - if (!ndd->data) - return -ENXIO; - if (offset + len > ndd->nsarea.config_size) return -ENXIO; diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 43bad0d5bdb6..563f24af01b5 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -417,6 +417,44 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) return 0; } +int nd_label_data_init(struct nvdimm_drvdata *ndd) +{ + size_t config_size, read_size; + int rc = 0; + + if (ndd->data) + return 0; + + if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0) { + dev_dbg(ndd->dev, "failed to init config data area: (%u:%u)\n", + ndd->nsarea.max_xfer, ndd->nsarea.config_size); + return -ENXIO; + } + + /* + * We need to determine the maximum index area as this is the section + * we must read and validate before we can start processing labels. + * + * If the area is too small to contain the two indexes and 2 labels + * then we abort. + * + * Start at a label size of 128 as this should result in the largest + * possible namespace index size. + */ + ndd->nslabel_size = 128; + read_size = sizeof_namespace_index(ndd) * 2; + if (!read_size) + return -ENXIO; + + /* Allocate config data */ + config_size = ndd->nsarea.config_size; + ndd->data = kvzalloc(config_size, GFP_KERNEL); + if (!ndd->data) + return -ENOMEM; + + return nvdimm_get_config_data(ndd, ndd->data, 0, config_size); +} + int nd_label_active_count(struct nvdimm_drvdata *ndd) { struct nd_namespace_index *nsindex; diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h index 18bbe183b3a9..685afb3de0fe 100644 --- a/drivers/nvdimm/label.h +++ b/drivers/nvdimm/label.h @@ -141,6 +141,7 @@ struct nvdimm_drvdata; int nd_label_validate(struct nvdimm_drvdata *ndd); void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, struct nd_namespace_index *src); +int nd_label_data_init(struct nvdimm_drvdata *ndd); size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd); int nd_label_active_count(struct nvdimm_drvdata *ndd); struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n); diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 98317e7ce5b5..e79cc8e5c114 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -241,6 +241,8 @@ struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping); int nvdimm_check_config_data(struct device *dev); int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd); int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); +int nvdimm_get_config_data(struct nvdimm_drvdata *ndd, void *buf, + size_t offset, size_t len); int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, void *buf, size_t len); long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, From 7d47aad4570e5e6e9a8162bb417ca9b74132f27c Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 10 Oct 2018 16:39:35 -0700 Subject: [PATCH 13/21] nvdimm: Use namespace index data to reduce number of label reads needed This patch adds logic that is meant to make use of the namespace index data to reduce the number of reads that are needed to initialize a given namespace. The general idea is that once we have enough data to validate the namespace index we do so and then proceed to fetch only those labels that are not listed as being "free". By doing this I am seeing a total time reduction from about 4-5 seconds to 2-3 seconds for 24 NVDIMM modules each with 128K of label config area. Reviewed-by: Toshi Kani Signed-off-by: Alexander Duyck Signed-off-by: Dan Williams --- drivers/nvdimm/dimm.c | 4 -- drivers/nvdimm/label.c | 93 +++++++++++++++++++++++++++++++++++++++--- drivers/nvdimm/label.h | 3 -- 3 files changed, 88 insertions(+), 12 deletions(-) diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c index 07bf96948553..9899c97138a3 100644 --- a/drivers/nvdimm/dimm.c +++ b/drivers/nvdimm/dimm.c @@ -84,10 +84,6 @@ static int nvdimm_probe(struct device *dev) dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size); nvdimm_bus_lock(dev); - ndd->ns_current = nd_label_validate(ndd); - ndd->ns_next = nd_label_next_nsindex(ndd->ns_current); - nd_label_copy(ndd, to_next_namespace_index(ndd), - to_current_namespace_index(ndd)); if (ndd->ns_current >= 0) { rc = nd_label_reserve_dpa(ndd); if (rc == 0) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 563f24af01b5..7f03d117824f 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -235,7 +235,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd) return -1; } -int nd_label_validate(struct nvdimm_drvdata *ndd) +static int nd_label_validate(struct nvdimm_drvdata *ndd) { /* * In order to probe for and validate namespace index blocks we @@ -258,8 +258,9 @@ int nd_label_validate(struct nvdimm_drvdata *ndd) return -1; } -void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, - struct nd_namespace_index *src) +static void nd_label_copy(struct nvdimm_drvdata *ndd, + struct nd_namespace_index *dst, + struct nd_namespace_index *src) { /* just exit if either destination or source is NULL */ if (!dst || !src) @@ -419,7 +420,9 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) int nd_label_data_init(struct nvdimm_drvdata *ndd) { - size_t config_size, read_size; + size_t config_size, read_size, max_xfer, offset; + struct nd_namespace_index *nsindex; + unsigned int i; int rc = 0; if (ndd->data) @@ -452,7 +455,87 @@ int nd_label_data_init(struct nvdimm_drvdata *ndd) if (!ndd->data) return -ENOMEM; - return nvdimm_get_config_data(ndd, ndd->data, 0, config_size); + /* + * We want to guarantee as few reads as possible while conserving + * memory. To do that we figure out how much unused space will be left + * in the last read, divide that by the total number of reads it is + * going to take given our maximum transfer size, and then reduce our + * maximum transfer size based on that result. + */ + max_xfer = min_t(size_t, ndd->nsarea.max_xfer, config_size); + if (read_size < max_xfer) { + /* trim waste */ + max_xfer -= ((max_xfer - 1) - (config_size - 1) % max_xfer) / + DIV_ROUND_UP(config_size, max_xfer); + /* make certain we read indexes in exactly 1 read */ + if (max_xfer < read_size) + max_xfer = read_size; + } + + /* Make our initial read size a multiple of max_xfer size */ + read_size = min(DIV_ROUND_UP(read_size, max_xfer) * max_xfer, + config_size); + + /* Read the index data */ + rc = nvdimm_get_config_data(ndd, ndd->data, 0, read_size); + if (rc) + goto out_err; + + /* Validate index data, if not valid assume all labels are invalid */ + ndd->ns_current = nd_label_validate(ndd); + if (ndd->ns_current < 0) + return 0; + + /* Record our index values */ + ndd->ns_next = nd_label_next_nsindex(ndd->ns_current); + + /* Copy "current" index on top of the "next" index */ + nsindex = to_current_namespace_index(ndd); + nd_label_copy(ndd, to_next_namespace_index(ndd), nsindex); + + /* Determine starting offset for label data */ + offset = __le64_to_cpu(nsindex->labeloff); + + /* Loop through the free list pulling in any active labels */ + for (i = 0; i < nsindex->nslot; i++, offset += ndd->nslabel_size) { + size_t label_read_size; + + /* zero out the unused labels */ + if (test_bit_le(i, nsindex->free)) { + memset(ndd->data + offset, 0, ndd->nslabel_size); + continue; + } + + /* if we already read past here then just continue */ + if (offset + ndd->nslabel_size <= read_size) + continue; + + /* if we haven't read in a while reset our read_size offset */ + if (read_size < offset) + read_size = offset; + + /* determine how much more will be read after this next call. */ + label_read_size = offset + ndd->nslabel_size - read_size; + label_read_size = DIV_ROUND_UP(label_read_size, max_xfer) * + max_xfer; + + /* truncate last read if needed */ + if (read_size + label_read_size > config_size) + label_read_size = config_size - read_size; + + /* Read the label data */ + rc = nvdimm_get_config_data(ndd, ndd->data + read_size, + read_size, label_read_size); + if (rc) + goto out_err; + + /* push read_size to next read offset */ + read_size += label_read_size; + } + + dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc); +out_err: + return rc; } int nd_label_active_count(struct nvdimm_drvdata *ndd) diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h index 685afb3de0fe..e9a2ad3c2150 100644 --- a/drivers/nvdimm/label.h +++ b/drivers/nvdimm/label.h @@ -138,9 +138,6 @@ static inline int nd_label_next_nsindex(int index) } struct nvdimm_drvdata; -int nd_label_validate(struct nvdimm_drvdata *ndd); -void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, - struct nd_namespace_index *src); int nd_label_data_init(struct nvdimm_drvdata *ndd); size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd); int nd_label_active_count(struct nvdimm_drvdata *ndd); From 97052c1c31d5bcf08823ce1ea272447edd2d52de Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 11 Oct 2018 18:25:20 -0700 Subject: [PATCH 14/21] libnvdimm, label: Fix sparse warning The kbuild robot reports: drivers/nvdimm/label.c:500:32: warning: restricted __le32 degrades to integer ...read 'nslot' into a local u32. Reported-by: kbuild test robot Acked-by: Alexander Duyck Signed-off-by: Dan Williams --- drivers/nvdimm/label.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 7f03d117824f..750dbaa6ce82 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -424,6 +424,7 @@ int nd_label_data_init(struct nvdimm_drvdata *ndd) struct nd_namespace_index *nsindex; unsigned int i; int rc = 0; + u32 nslot; if (ndd->data) return 0; @@ -495,9 +496,10 @@ int nd_label_data_init(struct nvdimm_drvdata *ndd) /* Determine starting offset for label data */ offset = __le64_to_cpu(nsindex->labeloff); + nslot = __le32_to_cpu(nsindex->nslot); /* Loop through the free list pulling in any active labels */ - for (i = 0; i < nsindex->nslot; i++, offset += ndd->nslabel_size) { + for (i = 0; i < nslot; i++, offset += ndd->nslabel_size) { size_t label_read_size; /* zero out the unused labels */ From 6f07f86c494074a0755930473f67cc8916654221 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 26 Sep 2018 10:48:38 -0700 Subject: [PATCH 15/21] acpi, nfit: Introduce nfit_mem flags In preparation for adding a flag to indicate whether a DIMM publishes a dirty-shutdown count, convert the existing flags to a bit field. Reviewed-by: Keith Busch Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 27 ++++++++++++++++----------- drivers/acpi/nfit/nfit.h | 8 ++++++-- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index b072cfc5f20e..308a77b6c837 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -191,18 +191,20 @@ static int xlat_nvdimm_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd * In the _LSI, _LSR, _LSW case the locked status is * communicated via the read/write commands */ - if (nfit_mem->has_lsr) + if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) break; if (status >> 16 & ND_CONFIG_LOCKED) return -EACCES; break; case ND_CMD_GET_CONFIG_DATA: - if (nfit_mem->has_lsr && status == ACPI_LABELS_LOCKED) + if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags) + && status == ACPI_LABELS_LOCKED) return -EACCES; break; case ND_CMD_SET_CONFIG_DATA: - if (nfit_mem->has_lsw && status == ACPI_LABELS_LOCKED) + if (test_bit(NFIT_MEM_LSW, &nfit_mem->flags) + && status == ACPI_LABELS_LOCKED) return -EACCES; break; default: @@ -480,14 +482,16 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, min_t(u32, 256, in_buf.buffer.length), true); /* call the BIOS, prefer the named methods over _DSM if available */ - if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsr) + if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE + && test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) out_obj = acpi_label_info(handle); - else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && nfit_mem->has_lsr) { + else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA + && test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) { struct nd_cmd_get_config_data_hdr *p = buf; out_obj = acpi_label_read(handle, p->in_offset, p->in_length); } else if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA - && nfit_mem->has_lsw) { + && test_bit(NFIT_MEM_LSW, &nfit_mem->flags)) { struct nd_cmd_set_config_hdr *p = buf; out_obj = acpi_label_write(handle, p->in_offset, p->in_length, @@ -1784,12 +1788,13 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, if (acpi_nvdimm_has_method(adev_dimm, "_LSI") && acpi_nvdimm_has_method(adev_dimm, "_LSR")) { dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev)); - nfit_mem->has_lsr = true; + set_bit(NFIT_MEM_LSR, &nfit_mem->flags); } - if (nfit_mem->has_lsr && acpi_nvdimm_has_method(adev_dimm, "_LSW")) { + if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags) + && acpi_nvdimm_has_method(adev_dimm, "_LSW")) { dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev)); - nfit_mem->has_lsw = true; + set_bit(NFIT_MEM_LSW, &nfit_mem->flags); } return 0; @@ -1878,11 +1883,11 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK; } - if (nfit_mem->has_lsr) { + if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) { set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); } - if (nfit_mem->has_lsw) + if (test_bit(NFIT_MEM_LSW, &nfit_mem->flags)) set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask); flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index d1274ea2d251..111c3c437c80 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -159,6 +159,11 @@ struct nfit_memdev { struct acpi_nfit_memory_map memdev[0]; }; +enum nfit_mem_flags { + NFIT_MEM_LSR, + NFIT_MEM_LSW, +}; + /* assembled tables for a given dimm/memory-device */ struct nfit_mem { struct nvdimm *nvdimm; @@ -178,9 +183,8 @@ struct nfit_mem { struct acpi_nfit_desc *acpi_desc; struct resource *flush_wpq; unsigned long dsm_mask; + unsigned long flags; int family; - bool has_lsr; - bool has_lsw; }; struct acpi_nfit_desc { From 0ead11181fe0c9538b185e46a494df21dc7de23a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 26 Sep 2018 10:47:15 -0700 Subject: [PATCH 16/21] acpi, nfit: Collect shutdown status Some NVDIMMs, in addition to providing an indication of whether the previous shutdown was clean, also provide a running count of lifetime dirty-shutdown events for the device. In anticipation of this functionality appearing on more devices arrange for the nfit driver to retrieve / cache this data at DIMM discovery time, and export it via sysfs. Reviewed-by: Keith Busch Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 77 ++++++++++++++++++++++++++- drivers/acpi/nfit/intel.h | 38 +++++++++++++ drivers/acpi/nfit/nfit.h | 3 ++ tools/testing/nvdimm/test/nfit.c | 1 + tools/testing/nvdimm/test/nfit_test.h | 24 --------- 5 files changed, 118 insertions(+), 25 deletions(-) create mode 100644 drivers/acpi/nfit/intel.h diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 308a77b6c837..bf7021bb276c 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -25,6 +25,7 @@ #include #include #include "nfit.h" +#include "intel.h" /* * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is @@ -1551,7 +1552,12 @@ static DEVICE_ATTR_RO(dsm_mask); static ssize_t flags_show(struct device *dev, struct device_attribute *attr, char *buf) { - u16 flags = to_nfit_memdev(dev)->flags; + struct nvdimm *nvdimm = to_nvdimm(dev); + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + u16 flags = __to_nfit_memdev(nfit_mem)->flags; + + if (test_bit(NFIT_MEM_DIRTY, &nfit_mem->flags)) + flags |= ACPI_NFIT_MEM_FLUSH_FAILED; return sprintf(buf, "%s%s%s%s%s%s%s\n", flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "", @@ -1582,6 +1588,16 @@ static ssize_t id_show(struct device *dev, } static DEVICE_ATTR_RO(id); +static ssize_t dirty_shutdown_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + + return sprintf(buf, "%d\n", nfit_mem->dirty_shutdown); +} +static DEVICE_ATTR_RO(dirty_shutdown); + static struct attribute *acpi_nfit_dimm_attributes[] = { &dev_attr_handle.attr, &dev_attr_phys_id.attr, @@ -1599,6 +1615,7 @@ static struct attribute *acpi_nfit_dimm_attributes[] = { &dev_attr_id.attr, &dev_attr_family.attr, &dev_attr_dsm_mask.attr, + &dev_attr_dirty_shutdown.attr, NULL, }; @@ -1607,6 +1624,7 @@ static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj, { struct device *dev = container_of(kobj, struct device, kobj); struct nvdimm *nvdimm = to_nvdimm(dev); + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); if (!to_nfit_dcr(dev)) { /* Without a dcr only the memdev attributes can be surfaced */ @@ -1620,6 +1638,11 @@ static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj, if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1) return 0; + + if (!test_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags) + && a == &dev_attr_dirty_shutdown.attr) + return 0; + return a->mode; } @@ -1698,6 +1721,56 @@ static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method) return false; } +static void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem) +{ + struct nd_intel_smart smart = { 0 }; + union acpi_object in_buf = { + .type = ACPI_TYPE_BUFFER, + .buffer.pointer = (char *) &smart, + .buffer.length = sizeof(smart), + }; + union acpi_object in_obj = { + .type = ACPI_TYPE_PACKAGE, + .package.count = 1, + .package.elements = &in_buf, + }; + const u8 func = ND_INTEL_SMART; + const guid_t *guid = to_nfit_uuid(nfit_mem->family); + u8 revid = nfit_dsm_revid(nfit_mem->family, func); + struct acpi_device *adev = nfit_mem->adev; + acpi_handle handle = adev->handle; + union acpi_object *out_obj; + + if ((nfit_mem->dsm_mask & (1 << func)) == 0) + return; + + out_obj = acpi_evaluate_dsm(handle, guid, revid, func, &in_obj); + if (!out_obj) + return; + + if (smart.flags & ND_INTEL_SMART_SHUTDOWN_VALID) { + if (smart.shutdown_state) + set_bit(NFIT_MEM_DIRTY, &nfit_mem->flags); + } + + if (smart.flags & ND_INTEL_SMART_SHUTDOWN_COUNT_VALID) { + set_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags); + nfit_mem->dirty_shutdown = smart.shutdown_count; + } + ACPI_FREE(out_obj); +} + +static void populate_shutdown_status(struct nfit_mem *nfit_mem) +{ + /* + * For DIMMs that provide a dynamic facility to retrieve a + * dirty-shutdown status and/or a dirty-shutdown count, cache + * these values in nfit_mem. + */ + if (nfit_mem->family == NVDIMM_FAMILY_INTEL) + nfit_intel_shutdown_status(nfit_mem); +} + static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, struct nfit_mem *nfit_mem, u32 device_handle) { @@ -1797,6 +1870,8 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, set_bit(NFIT_MEM_LSW, &nfit_mem->flags); } + populate_shutdown_status(nfit_mem); + return 0; } diff --git a/drivers/acpi/nfit/intel.h b/drivers/acpi/nfit/intel.h new file mode 100644 index 000000000000..86746312381f --- /dev/null +++ b/drivers/acpi/nfit/intel.h @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright(c) 2018 Intel Corporation. All rights reserved. + * Intel specific definitions for NVDIMM Firmware Interface Table - NFIT + */ +#ifndef _NFIT_INTEL_H_ +#define _NFIT_INTEL_H_ + +#define ND_INTEL_SMART 1 + +#define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID (1 << 5) +#define ND_INTEL_SMART_SHUTDOWN_VALID (1 << 10) + +struct nd_intel_smart { + u32 status; + union { + struct { + u32 flags; + u8 reserved0[4]; + u8 health; + u8 spares; + u8 life_used; + u8 alarm_flags; + u16 media_temperature; + u16 ctrl_temperature; + u32 shutdown_count; + u8 ait_status; + u16 pmic_temperature; + u8 reserved1[8]; + u8 shutdown_state; + u32 vendor_size; + u8 vendor_data[92]; + } __packed; + u8 data[128]; + }; +} __packed; + +#endif diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 111c3c437c80..af73aec547f2 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -162,6 +162,8 @@ struct nfit_memdev { enum nfit_mem_flags { NFIT_MEM_LSR, NFIT_MEM_LSW, + NFIT_MEM_DIRTY, + NFIT_MEM_DIRTY_COUNT, }; /* assembled tables for a given dimm/memory-device */ @@ -184,6 +186,7 @@ struct nfit_mem { struct resource *flush_wpq; unsigned long dsm_mask; unsigned long flags; + u32 dirty_shutdown; int family; }; diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index cffc2c5a778d..f1a2a1a7bb1b 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include "nfit_test.h" diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index 33752e06ff8d..ade14fe3837e 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -117,30 +117,6 @@ struct nd_cmd_ars_err_inj_stat { #define ND_INTEL_SMART_INJECT_FATAL (1 << 2) #define ND_INTEL_SMART_INJECT_SHUTDOWN (1 << 3) -struct nd_intel_smart { - __u32 status; - union { - struct { - __u32 flags; - __u8 reserved0[4]; - __u8 health; - __u8 spares; - __u8 life_used; - __u8 alarm_flags; - __u16 media_temperature; - __u16 ctrl_temperature; - __u32 shutdown_count; - __u8 ait_status; - __u16 pmic_temperature; - __u8 reserved1[8]; - __u8 shutdown_state; - __u32 vendor_size; - __u8 vendor_data[92]; - } __packed; - __u8 data[128]; - }; -} __packed; - struct nd_intel_smart_threshold { __u32 status; union { From f110176633d74bbac1f80ab9b9c6b83ea3e1cc23 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 17 Oct 2018 10:47:19 -0700 Subject: [PATCH 17/21] tools/testing/nvdimm: Populate dirty shutdown data Allow the unit tests to verify the retrieval of the dirty shutdown count via smart commands, and allow the driver-load-time retrieval of the smart health payload to be simulated by nfit_test. Reviewed-by: Keith Busch Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 7 +++++-- tools/testing/nvdimm/Kbuild | 1 + tools/testing/nvdimm/acpi_nfit_test.c | 8 ++++++++ tools/testing/nvdimm/test/nfit.c | 3 ++- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index bf7021bb276c..ec8fb578fa36 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1721,7 +1721,7 @@ static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method) return false; } -static void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem) +__weak void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem) { struct nd_intel_smart smart = { 0 }; union acpi_object in_buf = { @@ -1785,8 +1785,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en; nfit_mem->family = NVDIMM_FAMILY_INTEL; adev = to_acpi_dev(acpi_desc); - if (!adev) + if (!adev) { + /* unit test case */ + populate_shutdown_status(nfit_mem); return 0; + } adev_dimm = acpi_find_child_device(adev, device_handle, false); nfit_mem->adev = adev_dimm; diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 0392153a0009..778ceb651000 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -22,6 +22,7 @@ NVDIMM_SRC := $(DRIVERS)/nvdimm ACPI_SRC := $(DRIVERS)/acpi/nfit DAX_SRC := $(DRIVERS)/dax ccflags-y := -I$(src)/$(NVDIMM_SRC)/ +ccflags-y += -I$(src)/$(ACPI_SRC)/ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c index 43521512e577..fec8fb1b7715 100644 --- a/tools/testing/nvdimm/acpi_nfit_test.c +++ b/tools/testing/nvdimm/acpi_nfit_test.c @@ -4,5 +4,13 @@ #include #include #include "watermark.h" +#include nfit_test_watermark(acpi_nfit); + +/* strong / override definition of nfit_intel_shutdown_status */ +void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem) +{ + set_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags); + nfit_mem->dirty_shutdown = 42; +} diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index f1a2a1a7bb1b..9527d47a1070 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -149,6 +149,7 @@ static const struct nd_intel_smart smart_def = { | ND_INTEL_SMART_ALARM_VALID | ND_INTEL_SMART_USED_VALID | ND_INTEL_SMART_SHUTDOWN_VALID + | ND_INTEL_SMART_SHUTDOWN_COUNT_VALID | ND_INTEL_SMART_MTEMP_VALID | ND_INTEL_SMART_CTEMP_VALID, .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH, @@ -161,8 +162,8 @@ static const struct nd_intel_smart smart_def = { .ait_status = 1, .life_used = 5, .shutdown_state = 0, + .shutdown_count = 42, .vendor_size = 0, - .shutdown_count = 100, }; struct nfit_test_fw { From 9607871f37dc3e717639694b8d0dc738f2a68efc Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Sep 2018 10:19:24 +0100 Subject: [PATCH 18/21] UAPI: ndctl: Fix g++-unsupported initialisation in headers The following code in the linux/ndctl header file: static inline const char *nvdimm_bus_cmd_name(unsigned cmd) { static const char * const names[] = { [ND_CMD_ARS_CAP] = "ars_cap", [ND_CMD_ARS_START] = "ars_start", [ND_CMD_ARS_STATUS] = "ars_status", [ND_CMD_CLEAR_ERROR] = "clear_error", [ND_CMD_CALL] = "cmd_call", }; if (cmd < ARRAY_SIZE(names) && names[cmd]) return names[cmd]; return "unknown"; } is broken in a number of ways: (1) ARRAY_SIZE() is not generally defined. (2) g++ does not support "non-trivial" array initialisers fully yet. (3) Every file that calls this function will acquire a copy of names[]. The same goes for nvdimm_cmd_name(). Fix all three by converting to a switch statement where each case returns a string. That way if cmd is a constant, the compiler can trivially reduce it and, if not, the compiler can use a shared lookup table if it thinks that is more efficient. A better way would be to remove these functions and their arrays from the header entirely. Signed-off-by: David Howells Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 48 +++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 7e27070b9440..2f2c43d633c5 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -128,37 +128,31 @@ enum { static inline const char *nvdimm_bus_cmd_name(unsigned cmd) { - static const char * const names[] = { - [ND_CMD_ARS_CAP] = "ars_cap", - [ND_CMD_ARS_START] = "ars_start", - [ND_CMD_ARS_STATUS] = "ars_status", - [ND_CMD_CLEAR_ERROR] = "clear_error", - [ND_CMD_CALL] = "cmd_call", - }; - - if (cmd < ARRAY_SIZE(names) && names[cmd]) - return names[cmd]; - return "unknown"; + switch (cmd) { + case ND_CMD_ARS_CAP: return "ars_cap"; + case ND_CMD_ARS_START: return "ars_start"; + case ND_CMD_ARS_STATUS: return "ars_status"; + case ND_CMD_CLEAR_ERROR: return "clear_error"; + case ND_CMD_CALL: return "cmd_call"; + default: return "unknown"; + } } static inline const char *nvdimm_cmd_name(unsigned cmd) { - static const char * const names[] = { - [ND_CMD_SMART] = "smart", - [ND_CMD_SMART_THRESHOLD] = "smart_thresh", - [ND_CMD_DIMM_FLAGS] = "flags", - [ND_CMD_GET_CONFIG_SIZE] = "get_size", - [ND_CMD_GET_CONFIG_DATA] = "get_data", - [ND_CMD_SET_CONFIG_DATA] = "set_data", - [ND_CMD_VENDOR_EFFECT_LOG_SIZE] = "effect_size", - [ND_CMD_VENDOR_EFFECT_LOG] = "effect_log", - [ND_CMD_VENDOR] = "vendor", - [ND_CMD_CALL] = "cmd_call", - }; - - if (cmd < ARRAY_SIZE(names) && names[cmd]) - return names[cmd]; - return "unknown"; + switch (cmd) { + case ND_CMD_SMART: return "smart"; + case ND_CMD_SMART_THRESHOLD: return "smart_thresh"; + case ND_CMD_DIMM_FLAGS: return "flags"; + case ND_CMD_GET_CONFIG_SIZE: return "get_size"; + case ND_CMD_GET_CONFIG_DATA: return "get_data"; + case ND_CMD_SET_CONFIG_DATA: return "set_data"; + case ND_CMD_VENDOR_EFFECT_LOG_SIZE: return "effect_size"; + case ND_CMD_VENDOR_EFFECT_LOG: return "effect_log"; + case ND_CMD_VENDOR: return "vendor"; + case ND_CMD_CALL: return "cmd_call"; + default: return "unknown"; + } } #define ND_IOCTL 'N' From f366d322aea782cf786aa821d5accdc1609f9e10 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Sep 2018 10:19:30 +0100 Subject: [PATCH 19/21] UAPI: ndctl: Remove use of PAGE_SIZE The macro PAGE_SIZE isn't valid outside of the kernel, so it should not appear in UAPI headers. Furthermore, the actual machine page size could theoretically change from an application's point of view if it's running in a container that gets migrated to another machine (say 4K/ppc64 to 64K/ppc64). Fixes: f2ba5a5baecf ("libnvdimm, namespace: make min namespace size 4K") Signed-off-by: David Howells Signed-off-by: Dan Williams --- include/linux/ndctl.h | 22 ++++++++++++++++++++++ include/uapi/linux/ndctl.h | 4 ---- 2 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 include/linux/ndctl.h diff --git a/include/linux/ndctl.h b/include/linux/ndctl.h new file mode 100644 index 000000000000..cd5a293ce3ae --- /dev/null +++ b/include/linux/ndctl.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2014-2016, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU Lesser General Public License, + * version 2.1, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + */ +#ifndef _LINUX_NDCTL_H +#define _LINUX_NDCTL_H + +#include + +enum { + ND_MIN_NAMESPACE_SIZE = PAGE_SIZE, +}; + +#endif /* _LINUX_NDCTL_H */ diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 2f2c43d633c5..f57c9e434d2d 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -202,10 +202,6 @@ enum nd_driver_flags { ND_DRIVER_DAX_PMEM = 1 << ND_DEVICE_DAX_PMEM, }; -enum { - ND_MIN_NAMESPACE_SIZE = PAGE_SIZE, -}; - enum ars_masks { ARS_STATUS_MASK = 0x0000FFFF, ARS_EXT_STATUS_SHIFT = 16, From d3abaf43bab8d5b0a3c6b982100d9e2be96de4ad Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 13 Oct 2018 20:32:17 -0700 Subject: [PATCH 20/21] acpi, nfit: Fix Address Range Scrub completion tracking The Address Range Scrub implementation tried to skip running scrubs against ranges that were already scrubbed by the BIOS. Unfortunately that support also resulted in early scrub completions as evidenced by this debug output from nfit_test: nd_region region9: ARS: range 1 short complete nd_region region3: ARS: range 1 short complete nd_region region4: ARS: range 2 ARS start (0) nd_region region4: ARS: range 2 short complete ...i.e. completions without any indications that the scrub was started. This state of affairs was hard to see in the code due to the proliferation of state bits and mistakenly trying to track done state per-range when the completion is a global property of the bus. So, kill the four ARS state bits (ARS_REQ, ARS_REQ_REDO, ARS_DONE, and ARS_SHORT), and replace them with just 2 request flags ARS_REQ_SHORT and ARS_REQ_LONG. The implementation will still complete and reap the results of BIOS initiated ARS, but it will not attempt to use that information to affect the completion status of scrubbing the ranges from a Linux perspective. Instead, try to synchronously run a short ARS per range at init time and schedule a long scrub in the background. If ARS is busy with an ARS request, schedule both a short and a long scrub for when ARS returns to idle. This logic also satisfies the intent of what ARS_REQ_REDO was trying to achieve. The new rule is that the REQ flag stays set until the next successful ars_start() for that range. With the new policy that the REQ flags are not cleared until the next start, the implementation no longer loses requests as can be seen from the following log: nd_region region3: ARS: range 1 ARS start short (0) nd_region region9: ARS: range 1 ARS start short (0) nd_region region3: ARS: range 1 complete nd_region region4: ARS: range 2 ARS start short (0) nd_region region9: ARS: range 1 complete nd_region region9: ARS: range 1 ARS start long (0) nd_region region4: ARS: range 2 complete nd_region region3: ARS: range 1 ARS start long (0) nd_region region9: ARS: range 1 complete nd_region region3: ARS: range 1 complete nd_region region4: ARS: range 2 ARS start long (0) nd_region region4: ARS: range 2 complete ...note that the nfit_test emulated driver provides 2 buses, that is why some of the range indices are duplicated. Notice that each range now successfully completes a short and long scrub. Cc: Fixes: 14c73f997a5e ("nfit, address-range-scrub: introduce nfit_spa->ars_state") Fixes: cc3d3458d46f ("acpi/nfit: queue issuing of ars when an uc error...") Reported-by: Jacek Zloch Reported-by: Krzysztof Rusocki Reviewed-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 167 ++++++++++++++++++++++----------------- drivers/acpi/nfit/nfit.h | 10 +-- 2 files changed, 100 insertions(+), 77 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index ec8fb578fa36..b24cd3e5b99f 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2549,7 +2549,8 @@ static int ars_get_cap(struct acpi_nfit_desc *acpi_desc, return cmd_rc; } -static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa) +static int ars_start(struct acpi_nfit_desc *acpi_desc, + struct nfit_spa *nfit_spa, enum nfit_ars_state req_type) { int rc; int cmd_rc; @@ -2560,7 +2561,7 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa memset(&ars_start, 0, sizeof(ars_start)); ars_start.address = spa->address; ars_start.length = spa->length; - if (test_bit(ARS_SHORT, &nfit_spa->ars_state)) + if (req_type == ARS_REQ_SHORT) ars_start.flags = ND_ARS_RETURN_PREV_DATA; if (nfit_spa_type(spa) == NFIT_SPA_PM) ars_start.type = ND_ARS_PERSISTENT; @@ -2617,6 +2618,15 @@ static void ars_complete(struct acpi_nfit_desc *acpi_desc, struct nd_region *nd_region = nfit_spa->nd_region; struct device *dev; + lockdep_assert_held(&acpi_desc->init_mutex); + /* + * Only advance the ARS state for ARS runs initiated by the + * kernel, ignore ARS results from BIOS initiated runs for scrub + * completion tracking. + */ + if (acpi_desc->scrub_spa != nfit_spa) + return; + if ((ars_status->address >= spa->address && ars_status->address < spa->address + spa->length) || (ars_status->address < spa->address)) { @@ -2636,28 +2646,13 @@ static void ars_complete(struct acpi_nfit_desc *acpi_desc, } else return; - if (test_bit(ARS_DONE, &nfit_spa->ars_state)) - return; - - if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state)) - return; - + acpi_desc->scrub_spa = NULL; if (nd_region) { dev = nd_region_dev(nd_region); nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON); } else dev = acpi_desc->dev; - - dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index, - test_bit(ARS_SHORT, &nfit_spa->ars_state) - ? "short" : "long"); - clear_bit(ARS_SHORT, &nfit_spa->ars_state); - if (test_and_clear_bit(ARS_REQ_REDO, &nfit_spa->ars_state)) { - set_bit(ARS_SHORT, &nfit_spa->ars_state); - set_bit(ARS_REQ, &nfit_spa->ars_state); - dev_dbg(dev, "ARS: processing scrub request received while in progress\n"); - } else - set_bit(ARS_DONE, &nfit_spa->ars_state); + dev_dbg(dev, "ARS: range %d complete\n", spa->range_index); } static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc) @@ -2938,46 +2933,55 @@ static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc) return 0; } -static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa, - int *query_rc) +static int ars_register(struct acpi_nfit_desc *acpi_desc, + struct nfit_spa *nfit_spa) { - int rc = *query_rc; + int rc; - if (no_init_ars) + if (no_init_ars || test_bit(ARS_FAILED, &nfit_spa->ars_state)) return acpi_nfit_register_region(acpi_desc, nfit_spa); - set_bit(ARS_REQ, &nfit_spa->ars_state); - set_bit(ARS_SHORT, &nfit_spa->ars_state); + set_bit(ARS_REQ_SHORT, &nfit_spa->ars_state); + set_bit(ARS_REQ_LONG, &nfit_spa->ars_state); - switch (rc) { + switch (acpi_nfit_query_poison(acpi_desc)) { case 0: case -EAGAIN: - rc = ars_start(acpi_desc, nfit_spa); - if (rc == -EBUSY) { - *query_rc = rc; + rc = ars_start(acpi_desc, nfit_spa, ARS_REQ_SHORT); + /* shouldn't happen, try again later */ + if (rc == -EBUSY) break; - } else if (rc == 0) { - rc = acpi_nfit_query_poison(acpi_desc); - } else { + if (rc) { set_bit(ARS_FAILED, &nfit_spa->ars_state); break; } - if (rc == -EAGAIN) - clear_bit(ARS_SHORT, &nfit_spa->ars_state); - else if (rc == 0) - ars_complete(acpi_desc, nfit_spa); + clear_bit(ARS_REQ_SHORT, &nfit_spa->ars_state); + rc = acpi_nfit_query_poison(acpi_desc); + if (rc) + break; + acpi_desc->scrub_spa = nfit_spa; + ars_complete(acpi_desc, nfit_spa); + /* + * If ars_complete() says we didn't complete the + * short scrub, we'll try again with a long + * request. + */ + acpi_desc->scrub_spa = NULL; break; case -EBUSY: + case -ENOMEM: case -ENOSPC: + /* + * BIOS was using ARS, wait for it to complete (or + * resources to become available) and then perform our + * own scrubs. + */ break; default: set_bit(ARS_FAILED, &nfit_spa->ars_state); break; } - if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state)) - set_bit(ARS_REQ, &nfit_spa->ars_state); - return acpi_nfit_register_region(acpi_desc, nfit_spa); } @@ -2999,6 +3003,8 @@ static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc, struct device *dev = acpi_desc->dev; struct nfit_spa *nfit_spa; + lockdep_assert_held(&acpi_desc->init_mutex); + if (acpi_desc->cancel) return 0; @@ -3022,21 +3028,49 @@ static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc, ars_complete_all(acpi_desc); list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { + enum nfit_ars_state req_type; + int rc; + if (test_bit(ARS_FAILED, &nfit_spa->ars_state)) continue; - if (test_bit(ARS_REQ, &nfit_spa->ars_state)) { - int rc = ars_start(acpi_desc, nfit_spa); - clear_bit(ARS_DONE, &nfit_spa->ars_state); - dev = nd_region_dev(nfit_spa->nd_region); - dev_dbg(dev, "ARS: range %d ARS start (%d)\n", - nfit_spa->spa->range_index, rc); - if (rc == 0 || rc == -EBUSY) - return 1; - dev_err(dev, "ARS: range %d ARS failed (%d)\n", - nfit_spa->spa->range_index, rc); - set_bit(ARS_FAILED, &nfit_spa->ars_state); + /* prefer short ARS requests first */ + if (test_bit(ARS_REQ_SHORT, &nfit_spa->ars_state)) + req_type = ARS_REQ_SHORT; + else if (test_bit(ARS_REQ_LONG, &nfit_spa->ars_state)) + req_type = ARS_REQ_LONG; + else + continue; + rc = ars_start(acpi_desc, nfit_spa, req_type); + + dev = nd_region_dev(nfit_spa->nd_region); + dev_dbg(dev, "ARS: range %d ARS start %s (%d)\n", + nfit_spa->spa->range_index, + req_type == ARS_REQ_SHORT ? "short" : "long", + rc); + /* + * Hmm, we raced someone else starting ARS? Try again in + * a bit. + */ + if (rc == -EBUSY) + return 1; + if (rc == 0) { + dev_WARN_ONCE(dev, acpi_desc->scrub_spa, + "scrub start while range %d active\n", + acpi_desc->scrub_spa->spa->range_index); + clear_bit(req_type, &nfit_spa->ars_state); + acpi_desc->scrub_spa = nfit_spa; + /* + * Consider this spa last for future scrub + * requests + */ + list_move_tail(&nfit_spa->list, &acpi_desc->spas); + return 1; } + + dev_err(dev, "ARS: range %d ARS failed (%d)\n", + nfit_spa->spa->range_index, rc); + set_bit(ARS_FAILED, &nfit_spa->ars_state); } return 0; } @@ -3092,6 +3126,7 @@ static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc, struct nd_cmd_ars_cap ars_cap; int rc; + set_bit(ARS_FAILED, &nfit_spa->ars_state); memset(&ars_cap, 0, sizeof(ars_cap)); rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa); if (rc < 0) @@ -3108,16 +3143,14 @@ static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc, nfit_spa->clear_err_unit = ars_cap.clear_err_unit; acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars); clear_bit(ARS_FAILED, &nfit_spa->ars_state); - set_bit(ARS_REQ, &nfit_spa->ars_state); } static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) { struct nfit_spa *nfit_spa; - int rc, query_rc; + int rc; list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { - set_bit(ARS_FAILED, &nfit_spa->ars_state); switch (nfit_spa_type(nfit_spa->spa)) { case NFIT_SPA_VOLATILE: case NFIT_SPA_PM: @@ -3126,20 +3159,12 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) } } - /* - * Reap any results that might be pending before starting new - * short requests. - */ - query_rc = acpi_nfit_query_poison(acpi_desc); - if (query_rc == 0) - ars_complete_all(acpi_desc); - list_for_each_entry(nfit_spa, &acpi_desc->spas, list) switch (nfit_spa_type(nfit_spa->spa)) { case NFIT_SPA_VOLATILE: case NFIT_SPA_PM: /* register regions and kick off initial ARS run */ - rc = ars_register(acpi_desc, nfit_spa, &query_rc); + rc = ars_register(acpi_desc, nfit_spa); if (rc) return rc; break; @@ -3334,7 +3359,8 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, return 0; } -int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags) +int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, + enum nfit_ars_state req_type) { struct device *dev = acpi_desc->dev; int scheduled = 0, busy = 0; @@ -3354,14 +3380,10 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags) if (test_bit(ARS_FAILED, &nfit_spa->ars_state)) continue; - if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state)) { + if (test_and_set_bit(req_type, &nfit_spa->ars_state)) busy++; - set_bit(ARS_REQ_REDO, &nfit_spa->ars_state); - } else { - if (test_bit(ARS_SHORT, &flags)) - set_bit(ARS_SHORT, &nfit_spa->ars_state); + else scheduled++; - } } if (scheduled) { sched_ars(acpi_desc); @@ -3547,10 +3569,11 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle) static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle) { struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev); - unsigned long flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ? - 0 : 1 << ARS_SHORT; - acpi_nfit_ars_rescan(acpi_desc, flags); + if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) + acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG); + else + acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_SHORT); } void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index af73aec547f2..df0f6b8407e7 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -118,10 +118,8 @@ enum nfit_dimm_notifiers { }; enum nfit_ars_state { - ARS_REQ, - ARS_REQ_REDO, - ARS_DONE, - ARS_SHORT, + ARS_REQ_SHORT, + ARS_REQ_LONG, ARS_FAILED, }; @@ -205,6 +203,7 @@ struct acpi_nfit_desc { struct device *dev; u8 ars_start_flags; struct nd_cmd_ars_status *ars_status; + struct nfit_spa *scrub_spa; struct delayed_work dwork; struct list_head list; struct kernfs_node *scrub_count_state; @@ -259,7 +258,8 @@ struct nfit_blk { extern struct list_head acpi_descs; extern struct mutex acpi_desc_lock; -int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags); +int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, + enum nfit_ars_state req_type); #ifdef CONFIG_X86_MCE void nfit_mce_register(void); From 594861215c834e4b59a30d4b794f6372717bc197 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 15 Oct 2018 12:57:31 -0700 Subject: [PATCH 21/21] acpi, nfit: Further restrict userspace ARS start requests In addition to not allowing ARS start while the background thread is actively running, prevent ARS start while any scrub request is pending. This aligns the window for ARS start submission with the status of ARS reported via sysfs. Previously userspace could sneak its own ARS start requests in while sysfs reported -EBUSY. Reviewed-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index b24cd3e5b99f..f8c638f3c946 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -3341,6 +3341,8 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd) { struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + struct nfit_spa *nfit_spa; + int rc = 0; if (nvdimm) return 0; @@ -3350,13 +3352,20 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, /* * The kernel and userspace may race to initiate a scrub, but * the scrub thread is prepared to lose that initial race. It - * just needs guarantees that any ars it initiates are not - * interrupted by any intervening start reqeusts from userspace. + * just needs guarantees that any ARS it initiates are not + * interrupted by any intervening start requests from userspace. */ - if (work_busy(&acpi_desc->dwork.work)) - return -EBUSY; + mutex_lock(&acpi_desc->init_mutex); + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) + if (acpi_desc->scrub_spa + || test_bit(ARS_REQ_SHORT, &nfit_spa->ars_state) + || test_bit(ARS_REQ_LONG, &nfit_spa->ars_state)) { + rc = -EBUSY; + break; + } + mutex_unlock(&acpi_desc->init_mutex); - return 0; + return rc; } int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,