From 6f9c104b183a5a4115a3999ddda6049cbfdbfbbc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 10 Jan 2019 17:38:55 +0200 Subject: [PATCH 01/29] APEI / ERST: Switch to use new generic UUID API There are new types and helpers that are supposed to be used in new code. As a preparation to get rid of legacy types and API functions do the conversion here. Signed-off-by: Andy Shevchenko Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/erst.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 9953e50667ec..389d88e35ffb 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -938,17 +938,17 @@ static struct pstore_info erst_info = { }; #define CPER_CREATOR_PSTORE \ - UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ - 0x64, 0x90, 0xb8, 0x9d) + GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ + 0x64, 0x90, 0xb8, 0x9d) #define CPER_SECTION_TYPE_DMESG \ - UUID_LE(0xc197e04e, 0xd545, 0x4a70, 0x9c, 0x17, 0xa5, 0x54, \ - 0x94, 0x19, 0xeb, 0x12) + GUID_INIT(0xc197e04e, 0xd545, 0x4a70, 0x9c, 0x17, 0xa5, 0x54, \ + 0x94, 0x19, 0xeb, 0x12) #define CPER_SECTION_TYPE_DMESG_Z \ - UUID_LE(0x4f118707, 0x04dd, 0x4055, 0xb5, 0xdd, 0x95, 0x6d, \ - 0x34, 0xdd, 0xfa, 0xc6) + GUID_INIT(0x4f118707, 0x04dd, 0x4055, 0xb5, 0xdd, 0x95, 0x6d, \ + 0x34, 0xdd, 0xfa, 0xc6) #define CPER_SECTION_TYPE_MCE \ - UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ - 0x04, 0x4a, 0x38, 0xfc) + GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ + 0x04, 0x4a, 0x38, 0xfc) struct cper_pstore_record { struct cper_record_header hdr; @@ -1012,7 +1012,7 @@ skip: rc = -EIO; goto out; } - if (uuid_le_cmp(rcd->hdr.creator_id, CPER_CREATOR_PSTORE) != 0) + if (!guid_equal(&rcd->hdr.creator_id, &CPER_CREATOR_PSTORE)) goto skip; record->buf = kmalloc(len, GFP_KERNEL); @@ -1024,15 +1024,12 @@ skip: record->id = record_id; record->compressed = false; record->ecc_notice_size = 0; - if (uuid_le_cmp(rcd->sec_hdr.section_type, - CPER_SECTION_TYPE_DMESG_Z) == 0) { + if (guid_equal(&rcd->sec_hdr.section_type, &CPER_SECTION_TYPE_DMESG_Z)) { record->type = PSTORE_TYPE_DMESG; record->compressed = true; - } else if (uuid_le_cmp(rcd->sec_hdr.section_type, - CPER_SECTION_TYPE_DMESG) == 0) + } else if (guid_equal(&rcd->sec_hdr.section_type, &CPER_SECTION_TYPE_DMESG)) record->type = PSTORE_TYPE_DMESG; - else if (uuid_le_cmp(rcd->sec_hdr.section_type, - CPER_SECTION_TYPE_MCE) == 0) + else if (guid_equal(&rcd->sec_hdr.section_type, &CPER_SECTION_TYPE_MCE)) record->type = PSTORE_TYPE_MCE; else record->type = PSTORE_TYPE_MAX; From f3355298fc5a24eb7606448bc02a08b3485e5979 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 12 Nov 2018 19:00:55 +0000 Subject: [PATCH 02/29] ACPI / APEI: Fix parsing HEST that includes Deferred Machine Check subtable ACPI 6.2 includes a new definition for a Deferred Machine Check "DMC" subtable. The definition of this subtable was included in following commit: c042933df2b1 (ACPICA: Add support for new HEST subtable) However, the HEST parsing function was not updated to include this new subtable. Therefore, Linux will fail to parse the HEST on systems that include a DMC entry. Add the length check for the new DMC subtable so that HEST parsing doesn't fail on systems that include it. Signed-off-by: Yazen Ghannam Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/hest.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index b1e9f81ebeea..0935a0ef37e5 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -53,6 +53,7 @@ static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = { [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge), [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic), [ACPI_HEST_TYPE_GENERIC_ERROR_V2] = sizeof(struct acpi_hest_generic_v2), + [ACPI_HEST_TYPE_IA32_DEFERRED_CHECK] = -1, }; static int hest_esrc_len(struct acpi_hest_header *hest_hdr) @@ -75,6 +76,11 @@ static int hest_esrc_len(struct acpi_hest_header *hest_hdr) mc = (struct acpi_hest_ia_machine_check *)hest_hdr; len = sizeof(*mc) + mc->num_hardware_banks * sizeof(struct acpi_hest_ia_error_bank); + } else if (hest_type == ACPI_HEST_TYPE_IA32_DEFERRED_CHECK) { + struct acpi_hest_ia_deferred_check *mc; + mc = (struct acpi_hest_ia_deferred_check *)hest_hdr; + len = sizeof(*mc) + mc->num_hardware_banks * + sizeof(struct acpi_hest_ia_error_bank); } BUG_ON(len == -1); From ee9fa8f302dc1df10583fc043a91003e0a332d1b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 2 Jan 2019 13:18:08 +0000 Subject: [PATCH 03/29] ACPI: APEI: EINJ: Use DEFINE_DEBUGFS_ATTRIBUTE for debugfs files Use DEFINE_DEBUGFS_ATTRIBUTE rather than DEFINE_SIMPLE_ATTRIBUTE for debugfs files to make debugfs_simple_attr.cocci warnings go away. Semantic patch information: Rationale: DEFINE_SIMPLE_ATTRIBUTE + debugfs_create_file() imposes some significant overhead as compared to DEFINE_DEBUGFS_ATTRIBUTE + debugfs_create_file_unsafe(). Generated by: scripts/coccinelle/api/debugfs/debugfs_simple_attr.cocci Signed-off-by: YueHaibing Acked-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/einj.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index fcccbfdbdd1a..6119da1aea4e 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -644,8 +644,8 @@ static int error_type_set(void *data, u64 val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(error_type_fops, error_type_get, - error_type_set, "0x%llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(error_type_fops, error_type_get, error_type_set, + "0x%llx\n"); static int error_inject_set(void *data, u64 val) { @@ -656,8 +656,7 @@ static int error_inject_set(void *data, u64 val) error_param3, error_param4); } -DEFINE_SIMPLE_ATTRIBUTE(error_inject_fops, NULL, - error_inject_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(error_inject_fops, NULL, error_inject_set, "%llu\n"); static int einj_check_table(struct acpi_table_einj *einj_tab) { @@ -718,12 +717,14 @@ static int __init einj_init(void) if (!fentry) goto err_cleanup; - fentry = debugfs_create_file("error_type", S_IRUSR | S_IWUSR, - einj_debug_dir, NULL, &error_type_fops); + fentry = debugfs_create_file_unsafe("error_type", 0600, + einj_debug_dir, NULL, + &error_type_fops); if (!fentry) goto err_cleanup; - fentry = debugfs_create_file("error_inject", S_IWUSR, - einj_debug_dir, NULL, &error_inject_fops); + fentry = debugfs_create_file_unsafe("error_inject", 0200, + einj_debug_dir, NULL, + &error_inject_fops); if (!fentry) goto err_cleanup; From 78b0b690f6558ed788dccafa45965325dd11ba89 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:37 +0000 Subject: [PATCH 04/29] ACPI / APEI: Don't wait to serialise with oops messages when panic()ing oops_begin() exists to group printk() messages with the oops message printed by die(). To reach this caller we know that platform firmware took this error first, then notified the OS via NMI with a 'panic' severity. Don't wait for another CPU to release the die-lock before panic()ing, our only goal is to print this fatal error and panic(). This code is always called in_nmi(), and since commit 42a0bb3f7138 ("printk/nmi: generic solution for safe printk in NMI"), it has been safe to call printk() from this context. Messages are batched in a per-cpu buffer and printed via irq-work, or a call back from panic(). Link: https://patchwork.kernel.org/patch/10313555/ Acked-by: Borislav Petkov Signed-off-by: James Morse Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index f008ba7c9ced..0c46b79e31b1 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -949,7 +948,6 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) sev = ghes_severity(ghes->estatus->error_severity); if (sev >= GHES_SEV_PANIC) { - oops_begin(); ghes_print_queued_estatus(); __ghes_panic(ghes); } From 93066e9aefa16beb10bb4a32c2f1657822b57753 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:38 +0000 Subject: [PATCH 05/29] ACPI / APEI: Remove silent flag from ghes_read_estatus() Subsequent patches will split up ghes_read_estatus(), at which point passing around the 'silent' flag gets annoying. This is to suppress prink() messages, which prior to commit 42a0bb3f7138 ("printk/nmi: generic solution for safe printk in NMI"), were unsafe in NMI context. This is no longer necessary, remove the flag. printk() messages are batched in a per-cpu buffer and printed via irq-work, or a call back from panic(). Signed-off-by: James Morse Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 0c46b79e31b1..f0a704aed040 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -324,7 +324,7 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, } } -static int ghes_read_estatus(struct ghes *ghes, int silent) +static int ghes_read_estatus(struct ghes *ghes) { struct acpi_hest_generic *g = ghes->generic; u64 buf_paddr; @@ -333,8 +333,7 @@ static int ghes_read_estatus(struct ghes *ghes, int silent) rc = apei_read(&buf_paddr, &g->error_status_address); if (rc) { - if (!silent && printk_ratelimit()) - pr_warning(FW_WARN GHES_PFX + pr_warn_ratelimited(FW_WARN GHES_PFX "Failed to read error status block address for hardware error source: %d.\n", g->header.source_id); return -EIO; @@ -366,9 +365,9 @@ static int ghes_read_estatus(struct ghes *ghes, int silent) rc = 0; err_read_block: - if (rc && !silent && printk_ratelimit()) - pr_warning(FW_WARN GHES_PFX - "Failed to read error status block!\n"); + if (rc) + pr_warn_ratelimited(FW_WARN GHES_PFX + "Failed to read error status block!\n"); return rc; } @@ -702,7 +701,7 @@ static int ghes_proc(struct ghes *ghes) { int rc; - rc = ghes_read_estatus(ghes, 0); + rc = ghes_read_estatus(ghes); if (rc) goto out; @@ -939,7 +938,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) return ret; list_for_each_entry_rcu(ghes, &ghes_nmi, list) { - if (ghes_read_estatus(ghes, 1)) { + if (ghes_read_estatus(ghes)) { ghes_clear_estatus(ghes); continue; } else { From 0ac234be1a9497498e57d958f4251f5257b116b4 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:39 +0000 Subject: [PATCH 06/29] ACPI / APEI: Switch estatus pool to use vmalloc memory The ghes code is careful to parse and round firmware's advertised memory requirements for CPER records, up to a maximum of 64K. However when ghes_estatus_pool_expand() does its work, it splits the requested size into PAGE_SIZE granules. This means if firmware generates 5K of CPER records, and correctly describes this in the table, __process_error() will silently fail as it is unable to allocate more than PAGE_SIZE. Switch the estatus pool to vmalloc() memory. On x86 vmalloc() memory may fault and be fixed up by vmalloc_fault(). To prevent this call vmalloc_sync_all() before an NMI handler could discover the memory. Signed-off-by: James Morse Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index f0a704aed040..ee9206d5e119 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -170,40 +170,40 @@ static int ghes_estatus_pool_init(void) return 0; } -static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool, +static void ghes_estatus_pool_free_chunk(struct gen_pool *pool, struct gen_pool_chunk *chunk, void *data) { - free_page(chunk->start_addr); + vfree((void *)chunk->start_addr); } static void ghes_estatus_pool_exit(void) { gen_pool_for_each_chunk(ghes_estatus_pool, - ghes_estatus_pool_free_chunk_page, NULL); + ghes_estatus_pool_free_chunk, NULL); gen_pool_destroy(ghes_estatus_pool); } static int ghes_estatus_pool_expand(unsigned long len) { - unsigned long i, pages, size, addr; - int ret; + unsigned long size, addr; ghes_estatus_pool_size_request += PAGE_ALIGN(len); size = gen_pool_size(ghes_estatus_pool); if (size >= ghes_estatus_pool_size_request) return 0; - pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE; - for (i = 0; i < pages; i++) { - addr = __get_free_page(GFP_KERNEL); - if (!addr) - return -ENOMEM; - ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1); - if (ret) - return ret; - } - return 0; + addr = (unsigned long)vmalloc(PAGE_ALIGN(len)); + if (!addr) + return -ENOMEM; + + /* + * New allocation must be visible in all pgd before it can be found by + * an NMI allocating from the pool. + */ + vmalloc_sync_all(); + + return gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); } static int map_gen_v2(struct ghes *ghes) From e147133a42cb9df6cbc99503fdf58d0e6388bf2a Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:40 +0000 Subject: [PATCH 07/29] ACPI / APEI: Make hest.c manage the estatus memory pool ghes.c has a memory pool it uses for the estatus cache and the estatus queue. The cache is initialised when registering the platform driver. For the queue, an NMI-like notification has to grow/shrink the pool as it is registered and unregistered. This is all pretty noisy when adding new NMI-like notifications, it would be better to replace this with a static pool size based on the number of users. As a precursor, move the call that creates the pool from ghes_init(), into hest.c. Later this will take the number of ghes entries and consolidate the queue allocations. Remove ghes_estatus_pool_exit() as hest.c doesn't have anywhere to put this. The pool is now initialised as part of ACPI's subsys_initcall(): (acpi_init(), acpi_scan_init(), acpi_pci_root_init(), acpi_hest_init()) Before this patch it happened later as a GHES specific device_initcall(). Signed-off-by: James Morse Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 33 ++++++--------------------------- drivers/acpi/apei/hest.c | 10 +++++++++- include/acpi/ghes.h | 2 ++ 3 files changed, 17 insertions(+), 28 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index ee9206d5e119..4150c72c78cb 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -162,26 +162,16 @@ static void ghes_iounmap_irq(void) clear_fixmap(FIX_APEI_GHES_IRQ); } -static int ghes_estatus_pool_init(void) +static int ghes_estatus_pool_expand(unsigned long len); //temporary + +int ghes_estatus_pool_init(void) { ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); if (!ghes_estatus_pool) return -ENOMEM; - return 0; -} -static void ghes_estatus_pool_free_chunk(struct gen_pool *pool, - struct gen_pool_chunk *chunk, - void *data) -{ - vfree((void *)chunk->start_addr); -} - -static void ghes_estatus_pool_exit(void) -{ - gen_pool_for_each_chunk(ghes_estatus_pool, - ghes_estatus_pool_free_chunk, NULL); - gen_pool_destroy(ghes_estatus_pool); + return ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE * + GHES_ESTATUS_CACHE_ALLOCED_MAX); } static int ghes_estatus_pool_expand(unsigned long len) @@ -1227,18 +1217,9 @@ static int __init ghes_init(void) ghes_nmi_init_cxt(); - rc = ghes_estatus_pool_init(); - if (rc) - goto err; - - rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE * - GHES_ESTATUS_CACHE_ALLOCED_MAX); - if (rc) - goto err_pool_exit; - rc = platform_driver_register(&ghes_platform_driver); if (rc) - goto err_pool_exit; + goto err; rc = apei_osc_setup(); if (rc == 0 && osc_sb_apei_support_acked) @@ -1251,8 +1232,6 @@ static int __init ghes_init(void) pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); return 0; -err_pool_exit: - ghes_estatus_pool_exit(); err: return rc; } diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 0935a0ef37e5..e33bfd9d256c 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "apei-internal.h" @@ -209,6 +210,11 @@ static int __init hest_ghes_dev_register(unsigned int ghes_count) rc = apei_hest_parse(hest_parse_ghes, &ghes_arr); if (rc) goto err; + + rc = ghes_estatus_pool_init(); + if (rc) + goto err; + out: kfree(ghes_arr.ghes_devs); return rc; @@ -257,7 +263,9 @@ void __init acpi_hest_init(void) rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); if (rc) goto err; - rc = hest_ghes_dev_register(ghes_count); + + if (ghes_count) + rc = hest_ghes_dev_register(ghes_count); if (rc) goto err; } diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 82cb4eb225a4..46ef5566e052 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -52,6 +52,8 @@ enum { GHES_SEV_PANIC = 0x3, }; +int ghes_estatus_pool_init(void); + /* From drivers/edac/ghes_edac.c */ #ifdef CONFIG_EDAC_GHES From fb7be08f1a091ec243780bfdad4bf0c492057808 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:41 +0000 Subject: [PATCH 08/29] ACPI / APEI: Make estatus pool allocation a static size Adding new NMI-like notifications duplicates the calls that grow and shrink the estatus pool. This is all pretty pointless, as the size is capped to 64K. Allocate this for each ghes and drop the code that grows and shrinks the pool. Suggested-by: Borislav Petkov Signed-off-by: James Morse Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 49 +++++----------------------------------- drivers/acpi/apei/hest.c | 2 +- include/acpi/ghes.h | 2 +- 3 files changed, 8 insertions(+), 45 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 4150c72c78cb..33144ab0661a 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -162,27 +162,18 @@ static void ghes_iounmap_irq(void) clear_fixmap(FIX_APEI_GHES_IRQ); } -static int ghes_estatus_pool_expand(unsigned long len); //temporary - -int ghes_estatus_pool_init(void) +int ghes_estatus_pool_init(int num_ghes) { + unsigned long addr, len; + ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); if (!ghes_estatus_pool) return -ENOMEM; - return ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE * - GHES_ESTATUS_CACHE_ALLOCED_MAX); -} - -static int ghes_estatus_pool_expand(unsigned long len) -{ - unsigned long size, addr; - - ghes_estatus_pool_size_request += PAGE_ALIGN(len); - size = gen_pool_size(ghes_estatus_pool); - if (size >= ghes_estatus_pool_size_request) - return 0; + len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX; + len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE); + ghes_estatus_pool_size_request = PAGE_ALIGN(len); addr = (unsigned long)vmalloc(PAGE_ALIGN(len)); if (!addr) return -ENOMEM; @@ -956,32 +947,8 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) return ret; } -static unsigned long ghes_esource_prealloc_size( - const struct acpi_hest_generic *generic) -{ - unsigned long block_length, prealloc_records, prealloc_size; - - block_length = min_t(unsigned long, generic->error_block_length, - GHES_ESTATUS_MAX_SIZE); - prealloc_records = max_t(unsigned long, - generic->records_to_preallocate, 1); - prealloc_size = min_t(unsigned long, block_length * prealloc_records, - GHES_ESOURCE_PREALLOC_MAX_SIZE); - - return prealloc_size; -} - -static void ghes_estatus_pool_shrink(unsigned long len) -{ - ghes_estatus_pool_size_request -= PAGE_ALIGN(len); -} - static void ghes_nmi_add(struct ghes *ghes) { - unsigned long len; - - len = ghes_esource_prealloc_size(ghes->generic); - ghes_estatus_pool_expand(len); mutex_lock(&ghes_list_mutex); if (list_empty(&ghes_nmi)) register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes"); @@ -991,8 +958,6 @@ static void ghes_nmi_add(struct ghes *ghes) static void ghes_nmi_remove(struct ghes *ghes) { - unsigned long len; - mutex_lock(&ghes_list_mutex); list_del_rcu(&ghes->list); if (list_empty(&ghes_nmi)) @@ -1003,8 +968,6 @@ static void ghes_nmi_remove(struct ghes *ghes) * freed after NMI handler finishes. */ synchronize_rcu(); - len = ghes_esource_prealloc_size(ghes->generic); - ghes_estatus_pool_shrink(len); } static void ghes_nmi_init_cxt(void) diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index e33bfd9d256c..8113ddb14d28 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -211,7 +211,7 @@ static int __init hest_ghes_dev_register(unsigned int ghes_count) if (rc) goto err; - rc = ghes_estatus_pool_init(); + rc = ghes_estatus_pool_init(ghes_count); if (rc) goto err; diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 46ef5566e052..cd9ee507d860 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -52,7 +52,7 @@ enum { GHES_SEV_PANIC = 0x3, }; -int ghes_estatus_pool_init(void); +int ghes_estatus_pool_init(int num_ghes); /* From drivers/edac/ghes_edac.c */ From eeb2555779471abdbcc6289a52dc54ce513feaf2 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:42 +0000 Subject: [PATCH 09/29] ACPI / APEI: Don't store CPER records physical address in struct ghes When CPER records are found the address of the records is stashed in the struct ghes. Once the records have been processed, this address is overwritten with zero so that it won't be processed again without being re-populated by firmware. This goes wrong if a struct ghes can be processed concurrently, as can happen at probe time when an NMI occurs. If the NMI arrives on another CPU, the probing CPU may call ghes_clear_estatus() on the records before the handler had finished with them. Even on the same CPU, once the interrupted handler is resumed, it will call ghes_clear_estatus() on the NMIs records, this memory may have already been re-used by firmware. Avoid this stashing by letting the caller hold the address. A later patch will do away with the use of ghes->flags in the read/clear code too. Signed-off-by: James Morse Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 46 +++++++++++++++++++++++----------------- include/acpi/ghes.h | 1 - 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 33144ab0661a..a34f79153b1a 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -305,29 +305,30 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, } } -static int ghes_read_estatus(struct ghes *ghes) +static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr) { struct acpi_hest_generic *g = ghes->generic; - u64 buf_paddr; u32 len; int rc; - rc = apei_read(&buf_paddr, &g->error_status_address); + rc = apei_read(buf_paddr, &g->error_status_address); if (rc) { + *buf_paddr = 0; pr_warn_ratelimited(FW_WARN GHES_PFX "Failed to read error status block address for hardware error source: %d.\n", g->header.source_id); return -EIO; } - if (!buf_paddr) + if (!*buf_paddr) return -ENOENT; - ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, + ghes_copy_tofrom_phys(ghes->estatus, *buf_paddr, sizeof(*ghes->estatus), 1); - if (!ghes->estatus->block_status) + if (!ghes->estatus->block_status) { + *buf_paddr = 0; return -ENOENT; + } - ghes->buffer_paddr = buf_paddr; ghes->flags |= GHES_TO_CLEAR; rc = -EIO; @@ -339,7 +340,7 @@ static int ghes_read_estatus(struct ghes *ghes) if (cper_estatus_check_header(ghes->estatus)) goto err_read_block; ghes_copy_tofrom_phys(ghes->estatus + 1, - buf_paddr + sizeof(*ghes->estatus), + *buf_paddr + sizeof(*ghes->estatus), len - sizeof(*ghes->estatus), 1); if (cper_estatus_check(ghes->estatus)) goto err_read_block; @@ -349,15 +350,20 @@ err_read_block: if (rc) pr_warn_ratelimited(FW_WARN GHES_PFX "Failed to read error status block!\n"); + return rc; } -static void ghes_clear_estatus(struct ghes *ghes) +static void ghes_clear_estatus(struct ghes *ghes, u64 buf_paddr) { ghes->estatus->block_status = 0; if (!(ghes->flags & GHES_TO_CLEAR)) return; - ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr, + + if (!buf_paddr) + return; + + ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, sizeof(ghes->estatus->block_status), 0); ghes->flags &= ~GHES_TO_CLEAR; } @@ -666,11 +672,11 @@ static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2) return apei_write(val, &gv2->read_ack_register); } -static void __ghes_panic(struct ghes *ghes) +static void __ghes_panic(struct ghes *ghes, u64 buf_paddr) { __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); - ghes_clear_estatus(ghes); + ghes_clear_estatus(ghes, buf_paddr); /* reboot to log the error! */ if (!panic_timeout) @@ -680,14 +686,15 @@ static void __ghes_panic(struct ghes *ghes) static int ghes_proc(struct ghes *ghes) { + u64 buf_paddr; int rc; - rc = ghes_read_estatus(ghes); + rc = ghes_read_estatus(ghes, &buf_paddr); if (rc) goto out; if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) { - __ghes_panic(ghes); + __ghes_panic(ghes, buf_paddr); } if (!ghes_estatus_cached(ghes->estatus)) { @@ -697,7 +704,7 @@ static int ghes_proc(struct ghes *ghes) ghes_do_proc(ghes, ghes->estatus); out: - ghes_clear_estatus(ghes); + ghes_clear_estatus(ghes, buf_paddr); if (rc == -ENOENT) return rc; @@ -912,6 +919,7 @@ static void __process_error(struct ghes *ghes) static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) { + u64 buf_paddr; struct ghes *ghes; int sev, ret = NMI_DONE; @@ -919,8 +927,8 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) return ret; list_for_each_entry_rcu(ghes, &ghes_nmi, list) { - if (ghes_read_estatus(ghes)) { - ghes_clear_estatus(ghes); + if (ghes_read_estatus(ghes, &buf_paddr)) { + ghes_clear_estatus(ghes, buf_paddr); continue; } else { ret = NMI_HANDLED; @@ -929,14 +937,14 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) sev = ghes_severity(ghes->estatus->error_severity); if (sev >= GHES_SEV_PANIC) { ghes_print_queued_estatus(); - __ghes_panic(ghes); + __ghes_panic(ghes, buf_paddr); } if (!(ghes->flags & GHES_TO_CLEAR)) continue; __process_error(ghes); - ghes_clear_estatus(ghes); + ghes_clear_estatus(ghes, buf_paddr); } #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index cd9ee507d860..f82f4a7ddd90 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -22,7 +22,6 @@ struct ghes { struct acpi_hest_generic_v2 *generic_v2; }; struct acpi_hest_generic_status *estatus; - u64 buffer_paddr; unsigned long flags; union { struct list_head list; From 7d49f2c75af22f980fd716a13634a16cfb7dd8a7 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:43 +0000 Subject: [PATCH 10/29] ACPI / APEI: Remove spurious GHES_TO_CLEAR check ghes_notify_nmi() checks ghes->flags for GHES_TO_CLEAR before going on to __process_error(). This is pointless as ghes_read_estatus() will always set this flag if it returns success, which was checked earlier in the loop. Remove it. Signed-off-by: James Morse Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index a34f79153b1a..c20e1d0947b1 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -940,9 +940,6 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) __ghes_panic(ghes, buf_paddr); } - if (!(ghes->flags & GHES_TO_CLEAR)) - continue; - __process_error(ghes); ghes_clear_estatus(ghes, buf_paddr); } From 5cc6c68287ae4be22c40b41cf6844746cddebbcc Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:44 +0000 Subject: [PATCH 11/29] ACPI / APEI: Don't update struct ghes' flags in read/clear estatus ghes_read_estatus() sets a flag in struct ghes if the buffer of CPER records needs to be cleared once the records have been processed. This flag value is a problem if a struct ghes can be processed concurrently, as happens at probe time if an NMI arrives for the same error source. The NMI clears the flag, meaning the interrupted handler may never do the ghes_estatus_clear() work. The GHES_TO_CLEAR flags is only set at the same time as buffer_paddr, which is now owned by the caller and passed to ghes_clear_estatus(). Use this value as the flag. A non-zero buf_paddr returned by ghes_read_estatus() means ghes_clear_estatus() should clear this address. ghes_read_estatus() already checks for a read of error_status_address being zero, so CPER records cannot be written here. Signed-off-by: James Morse Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 5 ----- include/acpi/ghes.h | 1 - 2 files changed, 6 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index c20e1d0947b1..af3c10f47f20 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -329,8 +329,6 @@ static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr) return -ENOENT; } - ghes->flags |= GHES_TO_CLEAR; - rc = -EIO; len = cper_estatus_len(ghes->estatus); if (len < sizeof(*ghes->estatus)) @@ -357,15 +355,12 @@ err_read_block: static void ghes_clear_estatus(struct ghes *ghes, u64 buf_paddr) { ghes->estatus->block_status = 0; - if (!(ghes->flags & GHES_TO_CLEAR)) - return; if (!buf_paddr) return; ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, sizeof(ghes->estatus->block_status), 0); - ghes->flags &= ~GHES_TO_CLEAR; } static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev) diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index f82f4a7ddd90..e3f1cddb4ac8 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -13,7 +13,6 @@ * estatus: memory buffer for error status block, allocated during * HEST parsing. */ -#define GHES_TO_CLEAR 0x0001 #define GHES_EXITING 0x0002 struct ghes { From ee2eb3d4ee175c2fb5c7f67e84f5fe40a8147d92 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:45 +0000 Subject: [PATCH 12/29] ACPI / APEI: Generalise the estatus queue's notify code Refactor the estatus queue's pool notification routine from NOTIFY_NMI's handlers. This will allow another notification method to use the estatus queue without duplicating this code. Add rcu_read_lock()/rcu_read_unlock() around the list list_for_each_entry_rcu() walker. These aren't strictly necessary as the whole nmi_enter/nmi_exit() window is a spooky RCU read-side critical section. in_nmi_queue_one_entry() is separate from the rcu-list walker for a later caller that doesn't need to walk a list. Signed-off-by: James Morse Reviewed-by: Punit Agrawal Tested-by: Tyler Baicar [ rjw: Drop unnecessary err variable in two places ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 65 ++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index af3c10f47f20..babd2d53f319 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -912,37 +912,56 @@ static void __process_error(struct ghes *ghes) #endif } -static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) +static int ghes_in_nmi_queue_one_entry(struct ghes *ghes) { u64 buf_paddr; + int sev; + + if (ghes_read_estatus(ghes, &buf_paddr)) { + ghes_clear_estatus(ghes, buf_paddr); + return -ENOENT; + } + + sev = ghes_severity(ghes->estatus->error_severity); + if (sev >= GHES_SEV_PANIC) { + ghes_print_queued_estatus(); + __ghes_panic(ghes, buf_paddr); + } + + __process_error(ghes); + ghes_clear_estatus(ghes, buf_paddr); + + return 0; +} + +static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list) +{ + int ret = -ENOENT; struct ghes *ghes; - int sev, ret = NMI_DONE; + + rcu_read_lock(); + list_for_each_entry_rcu(ghes, rcu_list, list) { + if (!ghes_in_nmi_queue_one_entry(ghes)) + ret = 0; + } + rcu_read_unlock(); + + if (IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && !ret) + irq_work_queue(&ghes_proc_irq_work); + + return ret; +} + +static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) +{ + int ret = NMI_DONE; if (!atomic_add_unless(&ghes_in_nmi, 1, 1)) return ret; - list_for_each_entry_rcu(ghes, &ghes_nmi, list) { - if (ghes_read_estatus(ghes, &buf_paddr)) { - ghes_clear_estatus(ghes, buf_paddr); - continue; - } else { - ret = NMI_HANDLED; - } + if (!ghes_in_nmi_spool_from_list(&ghes_nmi)) + ret = NMI_HANDLED; - sev = ghes_severity(ghes->estatus->error_severity); - if (sev >= GHES_SEV_PANIC) { - ghes_print_queued_estatus(); - __ghes_panic(ghes, buf_paddr); - } - - __process_error(ghes); - ghes_clear_estatus(ghes, buf_paddr); - } - -#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - if (ret == NMI_HANDLED) - irq_work_queue(&ghes_proc_irq_work); -#endif atomic_dec(&ghes_in_nmi); return ret; } From 06ddeadc8d1c4f704b8956f239263bca75a3add8 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:46 +0000 Subject: [PATCH 13/29] ACPI / APEI: Don't allow ghes_ack_error() to mask earlier errors During ghes_proc() we use ghes_ack_error() to tell an external agent we are done with these records and it can re-use the memory. rc may hold an error returned by ghes_read_estatus(), ENOENT causes us to skip ghes_ack_error() (as there is nothing to ack), but rc may also by EIO, which gets supressed. ghes_clear_estatus() is where we mark the records as processed for non GHESv2 error sources, and already spots the ENOENT case as buf_paddr is set to 0 by ghes_read_estatus(). Move the ghes_ack_error() call in here to avoid extra logic with the return code in ghes_proc(). This enables GHESv2 acking for NMI-like error sources. This is safe as the buffer is pre-mapped by map_gen_v2() before the GHES is added to any NMI handler lists. This same pre-mapping step means we can't receive an error from apei_read()/write() here as apei_check_gar() succeeded when it was mapped, and the mapping was cached, so the address can't be rejected at runtime. Remove the error-returns as this is now called from a function with no return. Signed-off-by: James Morse Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 47 +++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index babd2d53f319..72c1dac64ea8 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -197,6 +197,21 @@ static void unmap_gen_v2(struct ghes *ghes) apei_unmap_generic_address(&ghes->generic_v2->read_ack_register); } +static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2) +{ + int rc; + u64 val = 0; + + rc = apei_read(&val, &gv2->read_ack_register); + if (rc) + return; + + val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset; + val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset; + + apei_write(val, &gv2->read_ack_register); +} + static struct ghes *ghes_new(struct acpi_hest_generic *generic) { struct ghes *ghes; @@ -361,6 +376,13 @@ static void ghes_clear_estatus(struct ghes *ghes, u64 buf_paddr) ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, sizeof(ghes->estatus->block_status), 0); + + /* + * GHESv2 type HEST entries introduce support for error acknowledgment, + * so only acknowledge the error if this support is present. + */ + if (is_hest_type_generic_v2(ghes)) + ghes_ack_error(ghes->generic_v2); } static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev) @@ -652,21 +674,6 @@ static void ghes_estatus_cache_add( rcu_read_unlock(); } -static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2) -{ - int rc; - u64 val = 0; - - rc = apei_read(&val, &gv2->read_ack_register); - if (rc) - return rc; - - val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset; - val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset; - - return apei_write(val, &gv2->read_ack_register); -} - static void __ghes_panic(struct ghes *ghes, u64 buf_paddr) { __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); @@ -701,16 +708,6 @@ static int ghes_proc(struct ghes *ghes) out: ghes_clear_estatus(ghes, buf_paddr); - if (rc == -ENOENT) - return rc; - - /* - * GHESv2 type HEST entries introduce support for error acknowledgment, - * so only acknowledge the error if this support is present. - */ - if (is_hest_type_generic_v2(ghes)) - return ghes_ack_error(ghes->generic_v2); - return rc; } From 9c9d08051380ad3f6e6376d4383615771c59fd99 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:47 +0000 Subject: [PATCH 14/29] ACPI / APEI: Move NOTIFY_SEA between the estatus-queue and NOTIFY_NMI The estatus-queue code is currently hidden by the NOTIFY_NMI #ifdefs. Once NOTIFY_SEA starts using the estatus-queue we can stop hiding it as each architecture has a user that can't be turned off. Split the existing CONFIG_HAVE_ACPI_APEI_NMI block in two, and move the SEA code into the gap. Move the code around ... and changes the stale comment describing why the status queue is necessary: printk() is no longer the issue, its the helpers like memory_failure_queue() that aren't nmi safe. Signed-off-by: James Morse Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 113 ++++++++++++++++++++------------------- 1 file changed, 59 insertions(+), 54 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 72c1dac64ea8..63dcd25b09c0 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -767,66 +767,21 @@ static struct notifier_block ghes_notifier_hed = { .notifier_call = ghes_notify_hed, }; -#ifdef CONFIG_ACPI_APEI_SEA -static LIST_HEAD(ghes_sea); - -/* - * Return 0 only if one of the SEA error sources successfully reported an error - * record sent from the firmware. - */ -int ghes_notify_sea(void) -{ - struct ghes *ghes; - int ret = -ENOENT; - - rcu_read_lock(); - list_for_each_entry_rcu(ghes, &ghes_sea, list) { - if (!ghes_proc(ghes)) - ret = 0; - } - rcu_read_unlock(); - return ret; -} - -static void ghes_sea_add(struct ghes *ghes) -{ - mutex_lock(&ghes_list_mutex); - list_add_rcu(&ghes->list, &ghes_sea); - mutex_unlock(&ghes_list_mutex); -} - -static void ghes_sea_remove(struct ghes *ghes) -{ - mutex_lock(&ghes_list_mutex); - list_del_rcu(&ghes->list); - mutex_unlock(&ghes_list_mutex); - synchronize_rcu(); -} -#else /* CONFIG_ACPI_APEI_SEA */ -static inline void ghes_sea_add(struct ghes *ghes) { } -static inline void ghes_sea_remove(struct ghes *ghes) { } -#endif /* CONFIG_ACPI_APEI_SEA */ - #ifdef CONFIG_HAVE_ACPI_APEI_NMI /* - * printk is not safe in NMI context. So in NMI handler, we allocate - * required memory from lock-less memory allocator - * (ghes_estatus_pool), save estatus into it, put them into lock-less - * list (ghes_estatus_llist), then delay printk into IRQ context via - * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record - * required pool size by all NMI error source. + * Handlers for CPER records may not be NMI safe. For example, + * memory_failure_queue() takes spinlocks and calls schedule_work_on(). + * In any NMI-like handler, memory from ghes_estatus_pool is used to save + * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes + * ghes_proc_in_irq() to run in IRQ context where each estatus in + * ghes_estatus_llist is processed. + * + * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache + * to suppress frequent messages. */ static struct llist_head ghes_estatus_llist; static struct irq_work ghes_proc_irq_work; -/* - * NMI may be triggered on any CPU, so ghes_in_nmi is used for - * having only one concurrent reader. - */ -static atomic_t ghes_in_nmi = ATOMIC_INIT(0); - -static LIST_HEAD(ghes_nmi); - static void ghes_proc_in_irq(struct irq_work *irq_work) { struct llist_node *llnode, *next; @@ -948,6 +903,56 @@ static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list) return ret; } +#endif /* CONFIG_HAVE_ACPI_APEI_NMI */ + +#ifdef CONFIG_ACPI_APEI_SEA +static LIST_HEAD(ghes_sea); + +/* + * Return 0 only if one of the SEA error sources successfully reported an error + * record sent from the firmware. + */ +int ghes_notify_sea(void) +{ + struct ghes *ghes; + int ret = -ENOENT; + + rcu_read_lock(); + list_for_each_entry_rcu(ghes, &ghes_sea, list) { + if (!ghes_proc(ghes)) + ret = 0; + } + rcu_read_unlock(); + return ret; +} + +static void ghes_sea_add(struct ghes *ghes) +{ + mutex_lock(&ghes_list_mutex); + list_add_rcu(&ghes->list, &ghes_sea); + mutex_unlock(&ghes_list_mutex); +} + +static void ghes_sea_remove(struct ghes *ghes) +{ + mutex_lock(&ghes_list_mutex); + list_del_rcu(&ghes->list); + mutex_unlock(&ghes_list_mutex); + synchronize_rcu(); +} +#else /* CONFIG_ACPI_APEI_SEA */ +static inline void ghes_sea_add(struct ghes *ghes) { } +static inline void ghes_sea_remove(struct ghes *ghes) { } +#endif /* CONFIG_ACPI_APEI_SEA */ + +#ifdef CONFIG_HAVE_ACPI_APEI_NMI +/* + * NMI may be triggered on any CPU, so ghes_in_nmi is used for + * having only one concurrent reader. + */ +static atomic_t ghes_in_nmi = ATOMIC_INIT(0); + +static LIST_HEAD(ghes_nmi); static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) { From 255097c82d821bb2bb18e9c7011841ee7342840f Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:48 +0000 Subject: [PATCH 15/29] ACPI / APEI: Switch NOTIFY_SEA to use the estatus queue Now that the estatus queue can be used by more than one notification method, we can move notifications that have NMI-like behaviour over. Switch NOTIFY_SEA over to use the estatus queue. This makes it behave in the same way as x86's NOTIFY_NMI. Remove Kconfig's ability to turn ACPI_APEI_SEA off if ACPI_APEI_GHES is selected. This roughly matches the x86 NOTIFY_NMI behaviour, and means each architecture has at least one user of the estatus-queue, meaning it doesn't need guarding with ifdef. Signed-off-by: James Morse Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/Kconfig | 12 +----------- drivers/acpi/apei/ghes.c | 22 +++++----------------- 2 files changed, 6 insertions(+), 28 deletions(-) diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 52ae5438edeb..6b18f8bc7be3 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -41,19 +41,9 @@ config ACPI_APEI_PCIEAER Turn on this option to enable the corresponding support. config ACPI_APEI_SEA - bool "APEI Synchronous External Abort logging/recovering support" + bool depends on ARM64 && ACPI_APEI_GHES default y - help - This option should be enabled if the system supports - firmware first handling of SEA (Synchronous External Abort). - SEA happens with certain faults of data abort or instruction - abort synchronous exceptions on ARMv8 systems. If a system - supports firmware first handling of SEA, the platform analyzes - and handles hardware error notifications from SEA, and it may then - form a HW error record for the OS to parse and handle. This - option allows the OS to look for such hardware error record, and - take appropriate action. config ACPI_APEI_MEMORY_FAILURE bool "APEI memory error recovering support" diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 63dcd25b09c0..679ab0f93ee8 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -767,7 +767,6 @@ static struct notifier_block ghes_notifier_hed = { .notifier_call = ghes_notify_hed, }; -#ifdef CONFIG_HAVE_ACPI_APEI_NMI /* * Handlers for CPER records may not be NMI safe. For example, * memory_failure_queue() takes spinlocks and calls schedule_work_on(). @@ -903,7 +902,6 @@ static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list) return ret; } -#endif /* CONFIG_HAVE_ACPI_APEI_NMI */ #ifdef CONFIG_ACPI_APEI_SEA static LIST_HEAD(ghes_sea); @@ -914,16 +912,7 @@ static LIST_HEAD(ghes_sea); */ int ghes_notify_sea(void) { - struct ghes *ghes; - int ret = -ENOENT; - - rcu_read_lock(); - list_for_each_entry_rcu(ghes, &ghes_sea, list) { - if (!ghes_proc(ghes)) - ret = 0; - } - rcu_read_unlock(); - return ret; + return ghes_in_nmi_spool_from_list(&ghes_sea); } static void ghes_sea_add(struct ghes *ghes) @@ -990,16 +979,15 @@ static void ghes_nmi_remove(struct ghes *ghes) */ synchronize_rcu(); } +#else /* CONFIG_HAVE_ACPI_APEI_NMI */ +static inline void ghes_nmi_add(struct ghes *ghes) { } +static inline void ghes_nmi_remove(struct ghes *ghes) { } +#endif /* CONFIG_HAVE_ACPI_APEI_NMI */ static void ghes_nmi_init_cxt(void) { init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); } -#else /* CONFIG_HAVE_ACPI_APEI_NMI */ -static inline void ghes_nmi_add(struct ghes *ghes) { } -static inline void ghes_nmi_remove(struct ghes *ghes) { } -static inline void ghes_nmi_init_cxt(void) { } -#endif /* CONFIG_HAVE_ACPI_APEI_NMI */ static int ghes_probe(struct platform_device *ghes_dev) { From 0db5e0223035b2c84e6186831fc27511270af812 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:49 +0000 Subject: [PATCH 16/29] KVM: arm/arm64: Add kvm_ras.h to collect kvm specific RAS plumbing To split up APEIs in_nmi() path, the caller needs to always be in_nmi(). KVM shouldn't have to know about this, pull the RAS plumbing out into a header file. Currently guest synchronous external aborts are claimed as RAS notifications by handle_guest_sea(), which is hidden in the arch codes mm/fault.c. 32bit gets a dummy declaration in system_misc.h. There is going to be more of this in the future if/when the kernel supports the SError-based firmware-first notification mechanism and/or kernel-first notifications for both synchronous external abort and SError. Each of these will come with some Kconfig symbols and a handful of header files. Create a header file for all this. This patch gives handle_guest_sea() a 'kvm_' prefix, and moves the declarations to kvm_ras.h as preparation for a future patch that moves the ACPI-specific RAS code out of mm/fault.c. Signed-off-by: James Morse Reviewed-by: Punit Agrawal Acked-by: Marc Zyngier Tested-by: Tyler Baicar Acked-by: Catalin Marinas Signed-off-by: Rafael J. Wysocki --- arch/arm/include/asm/kvm_ras.h | 14 ++++++++++++++ arch/arm/include/asm/system_misc.h | 5 ----- arch/arm64/include/asm/kvm_ras.h | 11 +++++++++++ arch/arm64/include/asm/system_misc.h | 2 -- arch/arm64/mm/fault.c | 2 +- virt/kvm/arm/mmu.c | 4 ++-- 6 files changed, 28 insertions(+), 10 deletions(-) create mode 100644 arch/arm/include/asm/kvm_ras.h create mode 100644 arch/arm64/include/asm/kvm_ras.h diff --git a/arch/arm/include/asm/kvm_ras.h b/arch/arm/include/asm/kvm_ras.h new file mode 100644 index 000000000000..e9577292dfe4 --- /dev/null +++ b/arch/arm/include/asm/kvm_ras.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018 - Arm Ltd */ + +#ifndef __ARM_KVM_RAS_H__ +#define __ARM_KVM_RAS_H__ + +#include + +static inline int kvm_handle_guest_sea(phys_addr_t addr, unsigned int esr) +{ + return -1; +} + +#endif /* __ARM_KVM_RAS_H__ */ diff --git a/arch/arm/include/asm/system_misc.h b/arch/arm/include/asm/system_misc.h index 8e76db83c498..66f6a3ae68d2 100644 --- a/arch/arm/include/asm/system_misc.h +++ b/arch/arm/include/asm/system_misc.h @@ -38,11 +38,6 @@ static inline void harden_branch_predictor(void) extern unsigned int user_debug; -static inline int handle_guest_sea(phys_addr_t addr, unsigned int esr) -{ - return -1; -} - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_ARM_SYSTEM_MISC_H */ diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h new file mode 100644 index 000000000000..6096f0251812 --- /dev/null +++ b/arch/arm64/include/asm/kvm_ras.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018 - Arm Ltd */ + +#ifndef __ARM64_KVM_RAS_H__ +#define __ARM64_KVM_RAS_H__ + +#include + +int kvm_handle_guest_sea(phys_addr_t addr, unsigned int esr); + +#endif /* __ARM64_KVM_RAS_H__ */ diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 0e2a0ecaf484..32693f34f431 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -46,8 +46,6 @@ extern void __show_regs(struct pt_regs *); extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); -int handle_guest_sea(phys_addr_t addr, unsigned int esr); - #endif /* __ASSEMBLY__ */ #endif /* __ASM_SYSTEM_MISC_H */ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index efb7b2cbead5..c76dc981e3fc 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -733,7 +733,7 @@ static const struct fault_info fault_info[] = { { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, }; -int handle_guest_sea(phys_addr_t addr, unsigned int esr) +int kvm_handle_guest_sea(phys_addr_t addr, unsigned int esr) { return ghes_notify_sea(); } diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index fbdf3ac2f001..600e0cc74ea4 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -27,10 +27,10 @@ #include #include #include +#include #include #include #include -#include #include "trace.h" @@ -1903,7 +1903,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) * For RAS the host kernel may handle this abort. * There is no need to pass the error into the guest. */ - if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) + if (!kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) return 1; if (unlikely(!is_iabt)) { From d44f1b8dd7e66d80cc4205809e5ace866bd851da Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:50 +0000 Subject: [PATCH 17/29] arm64: KVM/mm: Move SEA handling behind a single 'claim' interface To split up APEIs in_nmi() path, the caller needs to always be in_nmi(). Add a helper to do the work and claim the notification. When KVM or the arch code takes an exception that might be a RAS notification, it asks the APEI firmware-first code whether it wants to claim the exception. A future kernel-first mechanism may be queried afterwards, and claim the notification, otherwise we fall through to the existing default behaviour. The NOTIFY_SEA code was merged before considering multiple, possibly interacting, NMI-like notifications and the need to consider kernel first in the future. Make the 'claiming' behaviour explicit. Restructuring the APEI code to allow multiple NMI-like notifications means any notification that might interrupt interrupts-masked code must always be wrapped in nmi_enter()/nmi_exit(). This will allow APEI to use in_nmi() to use the right fixmap entries. Mask SError over this window to prevent an asynchronous RAS error arriving and tripping 'nmi_enter()'s BUG_ON(in_nmi()). Signed-off-by: James Morse Acked-by: Marc Zyngier Tested-by: Tyler Baicar Acked-by: Catalin Marinas Signed-off-by: Rafael J. Wysocki --- arch/arm64/include/asm/acpi.h | 4 +++- arch/arm64/include/asm/daifflags.h | 1 + arch/arm64/include/asm/kvm_ras.h | 16 ++++++++++++++- arch/arm64/kernel/acpi.c | 31 ++++++++++++++++++++++++++++++ arch/arm64/mm/fault.c | 24 +++++------------------ 5 files changed, 55 insertions(+), 21 deletions(-) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 2def77ec14be..7628efbe6c12 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -110,9 +111,10 @@ static inline u32 get_acpi_id_for_cpu(unsigned int cpu) static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_init_cpus(void); - +int apei_claim_sea(struct pt_regs *regs); #else static inline void acpi_init_cpus(void) { } +static inline int apei_claim_sea(struct pt_regs *regs) { return -ENOENT; } #endif /* CONFIG_ACPI */ #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 8d91f2233135..fa90779fc752 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -20,6 +20,7 @@ #define DAIF_PROCCTX 0 #define DAIF_PROCCTX_NOIRQ PSR_I_BIT +#define DAIF_ERRCTX (PSR_I_BIT | PSR_A_BIT) /* mask/save/unmask/restore all exceptions, including interrupts. */ static inline void local_daif_mask(void) diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h index 6096f0251812..8ac6ee77437c 100644 --- a/arch/arm64/include/asm/kvm_ras.h +++ b/arch/arm64/include/asm/kvm_ras.h @@ -4,8 +4,22 @@ #ifndef __ARM64_KVM_RAS_H__ #define __ARM64_KVM_RAS_H__ +#include +#include #include -int kvm_handle_guest_sea(phys_addr_t addr, unsigned int esr); +#include + +/* + * Was this synchronous external abort a RAS notification? + * Returns '0' for errors handled by some RAS subsystem, or -ENOENT. + */ +static inline int kvm_handle_guest_sea(phys_addr_t addr, unsigned int esr) +{ + /* apei_claim_sea(NULL) expects to mask interrupts itself */ + lockdep_assert_irqs_enabled(); + + return apei_claim_sea(NULL); +} #endif /* __ARM64_KVM_RAS_H__ */ diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 44e3c351e1ea..803f0494dd3e 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -27,8 +27,10 @@ #include #include +#include #include #include +#include #include #include @@ -256,3 +258,32 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr) return __pgprot(PROT_NORMAL_NC); return __pgprot(PROT_DEVICE_nGnRnE); } + +/* + * Claim Synchronous External Aborts as a firmware first notification. + * + * Used by KVM and the arch do_sea handler. + * @regs may be NULL when called from process context. + */ +int apei_claim_sea(struct pt_regs *regs) +{ + int err = -ENOENT; + unsigned long current_flags; + + if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES)) + return err; + + current_flags = arch_local_save_flags(); + + /* + * SEA can interrupt SError, mask it and describe this as an NMI so + * that APEI defers the handling. + */ + local_daif_restore(DAIF_ERRCTX); + nmi_enter(); + err = ghes_notify_sea(); + nmi_exit(); + local_daif_restore(current_flags); + + return err; +} diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c76dc981e3fc..e1c84c2e1cab 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -18,6 +18,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -33,6 +34,7 @@ #include #include +#include #include #include #include @@ -47,8 +49,6 @@ #include #include -#include - struct fault_info { int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); @@ -643,19 +643,10 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) inf = esr_to_fault_info(esr); /* - * Synchronous aborts may interrupt code which had interrupts masked. - * Before calling out into the wider kernel tell the interested - * subsystems. + * Return value ignored as we rely on signal merging. + * Future patches will make this more robust. */ - if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { - if (interrupts_enabled(regs)) - nmi_enter(); - - ghes_notify_sea(); - - if (interrupts_enabled(regs)) - nmi_exit(); - } + apei_claim_sea(regs); if (esr & ESR_ELx_FnV) siaddr = NULL; @@ -733,11 +724,6 @@ static const struct fault_info fault_info[] = { { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, }; -int kvm_handle_guest_sea(phys_addr_t addr, unsigned int esr) -{ - return ghes_notify_sea(); -} - asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) { From 3b880cbe4df5dd78a2b2279dbe16db9d193412ca Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:51 +0000 Subject: [PATCH 18/29] ACPI / APEI: Move locking to the notification helper ghes_copy_tofrom_phys() takes different locks depending on in_nmi(). This doesn't work if there are multiple NMI-like notifications, that can interrupt each other. Now that NOTIFY_SEA is always called in the same context, move the lock-taking to the notification helper. The helper will always know which lock to take. This avoids ghes_copy_tofrom_phys() taking a guess based on in_nmi(). This splits NOTIFY_NMI and NOTIFY_SEA to use different locks. All the other notifications use ghes_proc(), and are called in process or IRQ context. Move the spin_lock_irqsave() around their ghes_proc() calls. Signed-off-by: James Morse Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 679ab0f93ee8..c7dbdb915e7e 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -114,11 +114,10 @@ static DEFINE_MUTEX(ghes_list_mutex); * handler, but general ioremap can not be used in atomic context, so * the fixmap is used instead. * - * These 2 spinlocks are used to prevent the fixmap entries from being used + * This spinlock is used to prevent the fixmap entry from being used * simultaneously. */ -static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); -static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); +static DEFINE_SPINLOCK(ghes_notify_lock_irq); static struct gen_pool *ghes_estatus_pool; static unsigned long ghes_estatus_pool_size_request; @@ -287,7 +286,6 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, int from_phys) { void __iomem *vaddr; - unsigned long flags = 0; int in_nmi = in_nmi(); u64 offset; u32 trunk; @@ -295,10 +293,8 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, while (len > 0) { offset = paddr - (paddr & PAGE_MASK); if (in_nmi) { - raw_spin_lock(&ghes_ioremap_lock_nmi); vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT); } else { - spin_lock_irqsave(&ghes_ioremap_lock_irq, flags); vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT); } trunk = PAGE_SIZE - offset; @@ -312,10 +308,8 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, buffer += trunk; if (in_nmi) { ghes_iounmap_nmi(); - raw_spin_unlock(&ghes_ioremap_lock_nmi); } else { ghes_iounmap_irq(); - spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags); } } } @@ -729,8 +723,11 @@ static void ghes_add_timer(struct ghes *ghes) static void ghes_poll_func(struct timer_list *t) { struct ghes *ghes = from_timer(ghes, t, timer); + unsigned long flags; + spin_lock_irqsave(&ghes_notify_lock_irq, flags); ghes_proc(ghes); + spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); if (!(ghes->flags & GHES_EXITING)) ghes_add_timer(ghes); } @@ -738,9 +735,12 @@ static void ghes_poll_func(struct timer_list *t) static irqreturn_t ghes_irq_func(int irq, void *data) { struct ghes *ghes = data; + unsigned long flags; int rc; + spin_lock_irqsave(&ghes_notify_lock_irq, flags); rc = ghes_proc(ghes); + spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); if (rc) return IRQ_NONE; @@ -751,14 +751,17 @@ static int ghes_notify_hed(struct notifier_block *this, unsigned long event, void *data) { struct ghes *ghes; + unsigned long flags; int ret = NOTIFY_DONE; + spin_lock_irqsave(&ghes_notify_lock_irq, flags); rcu_read_lock(); list_for_each_entry_rcu(ghes, &ghes_hed, list) { if (!ghes_proc(ghes)) ret = NOTIFY_OK; } rcu_read_unlock(); + spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); return ret; } @@ -912,7 +915,14 @@ static LIST_HEAD(ghes_sea); */ int ghes_notify_sea(void) { - return ghes_in_nmi_spool_from_list(&ghes_sea); + static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea); + int rv; + + raw_spin_lock(&ghes_notify_lock_sea); + rv = ghes_in_nmi_spool_from_list(&ghes_sea); + raw_spin_unlock(&ghes_notify_lock_sea); + + return rv; } static void ghes_sea_add(struct ghes *ghes) @@ -945,13 +955,16 @@ static LIST_HEAD(ghes_nmi); static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) { + static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi); int ret = NMI_DONE; if (!atomic_add_unless(&ghes_in_nmi, 1, 1)) return ret; + raw_spin_lock(&ghes_notify_lock_nmi); if (!ghes_in_nmi_spool_from_list(&ghes_nmi)) ret = NMI_HANDLED; + raw_spin_unlock(&ghes_notify_lock_nmi); atomic_dec(&ghes_in_nmi); return ret; @@ -993,6 +1006,7 @@ static int ghes_probe(struct platform_device *ghes_dev) { struct acpi_hest_generic *generic; struct ghes *ghes = NULL; + unsigned long flags; int rc = -EINVAL; @@ -1095,7 +1109,9 @@ static int ghes_probe(struct platform_device *ghes_dev) ghes_edac_register(ghes, &ghes_dev->dev); /* Handle any pending errors right away */ + spin_lock_irqsave(&ghes_notify_lock_irq, flags); ghes_proc(ghes); + spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); return 0; From b484079b9f520cc9a0797d885f1cd7f64b72b1b2 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:52 +0000 Subject: [PATCH 19/29] ACPI / APEI: Let the notification helper specify the fixmap slot ghes_copy_tofrom_phys() uses a different fixmap slot depending on in_nmi(). This doesn't work when there are multiple NMI-like notifications, that could interrupt each other. As with the locking, move the chosen fixmap_idx to the notification helper. This only matters for NMI-like notifications, anything calling ghes_proc() can use the IRQ fixmap slot as its already holding an irqsave spinlock. This lets us collapse the ghes_ioremap_pfn_*() helpers. Signed-off-by: James Morse Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 92 +++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 53 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index c7dbdb915e7e..9a24641adfba 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -127,38 +128,24 @@ static atomic_t ghes_estatus_cache_alloced; static int ghes_panic_timeout __read_mostly = 30; -static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn) +static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx) { phys_addr_t paddr; pgprot_t prot; - paddr = pfn << PAGE_SHIFT; + paddr = PFN_PHYS(pfn); prot = arch_apei_get_mem_attribute(paddr); - __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot); + __set_fixmap(fixmap_idx, paddr, prot); - return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI); + return (void __iomem *) __fix_to_virt(fixmap_idx); } -static void __iomem *ghes_ioremap_pfn_irq(u64 pfn) +static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx) { - phys_addr_t paddr; - pgprot_t prot; + int _idx = virt_to_fix((unsigned long)vaddr); - paddr = pfn << PAGE_SHIFT; - prot = arch_apei_get_mem_attribute(paddr); - __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot); - - return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ); -} - -static void ghes_iounmap_nmi(void) -{ - clear_fixmap(FIX_APEI_GHES_NMI); -} - -static void ghes_iounmap_irq(void) -{ - clear_fixmap(FIX_APEI_GHES_IRQ); + WARN_ON_ONCE(fixmap_idx != _idx); + clear_fixmap(fixmap_idx); } int ghes_estatus_pool_init(int num_ghes) @@ -283,20 +270,16 @@ static inline int ghes_severity(int severity) } static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, - int from_phys) + int from_phys, + enum fixed_addresses fixmap_idx) { void __iomem *vaddr; - int in_nmi = in_nmi(); u64 offset; u32 trunk; while (len > 0) { offset = paddr - (paddr & PAGE_MASK); - if (in_nmi) { - vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT); - } else { - vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT); - } + vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx); trunk = PAGE_SIZE - offset; trunk = min(trunk, len); if (from_phys) @@ -306,15 +289,13 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, len -= trunk; paddr += trunk; buffer += trunk; - if (in_nmi) { - ghes_iounmap_nmi(); - } else { - ghes_iounmap_irq(); - } + ghes_unmap(vaddr, fixmap_idx); } } -static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr) +static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr, + enum fixed_addresses fixmap_idx) + { struct acpi_hest_generic *g = ghes->generic; u32 len; @@ -332,7 +313,7 @@ static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr) return -ENOENT; ghes_copy_tofrom_phys(ghes->estatus, *buf_paddr, - sizeof(*ghes->estatus), 1); + sizeof(*ghes->estatus), 1, fixmap_idx); if (!ghes->estatus->block_status) { *buf_paddr = 0; return -ENOENT; @@ -348,7 +329,7 @@ static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr) goto err_read_block; ghes_copy_tofrom_phys(ghes->estatus + 1, *buf_paddr + sizeof(*ghes->estatus), - len - sizeof(*ghes->estatus), 1); + len - sizeof(*ghes->estatus), 1, fixmap_idx); if (cper_estatus_check(ghes->estatus)) goto err_read_block; rc = 0; @@ -361,7 +342,8 @@ err_read_block: return rc; } -static void ghes_clear_estatus(struct ghes *ghes, u64 buf_paddr) +static void ghes_clear_estatus(struct ghes *ghes, u64 buf_paddr, + enum fixed_addresses fixmap_idx) { ghes->estatus->block_status = 0; @@ -369,7 +351,8 @@ static void ghes_clear_estatus(struct ghes *ghes, u64 buf_paddr) return; ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, - sizeof(ghes->estatus->block_status), 0); + sizeof(ghes->estatus->block_status), 0, + fixmap_idx); /* * GHESv2 type HEST entries introduce support for error acknowledgment, @@ -668,11 +651,12 @@ static void ghes_estatus_cache_add( rcu_read_unlock(); } -static void __ghes_panic(struct ghes *ghes, u64 buf_paddr) +static void __ghes_panic(struct ghes *ghes, u64 buf_paddr, + enum fixed_addresses fixmap_idx) { __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); - ghes_clear_estatus(ghes, buf_paddr); + ghes_clear_estatus(ghes, buf_paddr, fixmap_idx); /* reboot to log the error! */ if (!panic_timeout) @@ -685,12 +669,12 @@ static int ghes_proc(struct ghes *ghes) u64 buf_paddr; int rc; - rc = ghes_read_estatus(ghes, &buf_paddr); + rc = ghes_read_estatus(ghes, &buf_paddr, FIX_APEI_GHES_IRQ); if (rc) goto out; if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) { - __ghes_panic(ghes, buf_paddr); + __ghes_panic(ghes, buf_paddr, FIX_APEI_GHES_IRQ); } if (!ghes_estatus_cached(ghes->estatus)) { @@ -700,7 +684,7 @@ static int ghes_proc(struct ghes *ghes) ghes_do_proc(ghes, ghes->estatus); out: - ghes_clear_estatus(ghes, buf_paddr); + ghes_clear_estatus(ghes, buf_paddr, FIX_APEI_GHES_IRQ); return rc; } @@ -866,36 +850,38 @@ static void __process_error(struct ghes *ghes) #endif } -static int ghes_in_nmi_queue_one_entry(struct ghes *ghes) +static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, + enum fixed_addresses fixmap_idx) { u64 buf_paddr; int sev; - if (ghes_read_estatus(ghes, &buf_paddr)) { - ghes_clear_estatus(ghes, buf_paddr); + if (ghes_read_estatus(ghes, &buf_paddr, fixmap_idx)) { + ghes_clear_estatus(ghes, buf_paddr, fixmap_idx); return -ENOENT; } sev = ghes_severity(ghes->estatus->error_severity); if (sev >= GHES_SEV_PANIC) { ghes_print_queued_estatus(); - __ghes_panic(ghes, buf_paddr); + __ghes_panic(ghes, buf_paddr, fixmap_idx); } __process_error(ghes); - ghes_clear_estatus(ghes, buf_paddr); + ghes_clear_estatus(ghes, buf_paddr, fixmap_idx); return 0; } -static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list) +static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list, + enum fixed_addresses fixmap_idx) { int ret = -ENOENT; struct ghes *ghes; rcu_read_lock(); list_for_each_entry_rcu(ghes, rcu_list, list) { - if (!ghes_in_nmi_queue_one_entry(ghes)) + if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) ret = 0; } rcu_read_unlock(); @@ -919,7 +905,7 @@ int ghes_notify_sea(void) int rv; raw_spin_lock(&ghes_notify_lock_sea); - rv = ghes_in_nmi_spool_from_list(&ghes_sea); + rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_NMI); raw_spin_unlock(&ghes_notify_lock_sea); return rv; @@ -962,7 +948,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) return ret; raw_spin_lock(&ghes_notify_lock_nmi); - if (!ghes_in_nmi_spool_from_list(&ghes_nmi)) + if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI)) ret = NMI_HANDLED; raw_spin_unlock(&ghes_notify_lock_nmi); From f2a7e059aa7a6a22a6f4612f31ee29e726a3bfd0 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:53 +0000 Subject: [PATCH 20/29] ACPI / APEI: Pass ghes and estatus separately to avoid a later copy The NMI-like notifications scribble over ghes->estatus, before copying it somewhere else. If this interrupts the ghes_probe() code calling ghes_proc() on each struct ghes, the data is corrupted. All the NMI-like notifications should use a queued estatus entry from the beginning, instead of the ghes version, then copying it. To do this, break up any use of "ghes->estatus" so that all functions take the estatus as an argument. This patch just moves these ghes->estatus dereferences into separate arguments, no change in behaviour. struct ghes becomes unused in ghes_clear_estatus() as it only wanted ghes->estatus, which we now pass directly. This is removed. Signed-off-by: James Morse Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 92 +++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 43 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 9a24641adfba..54746a791f3f 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -293,9 +293,9 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, } } -static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr, - enum fixed_addresses fixmap_idx) - +static int ghes_read_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 *buf_paddr, enum fixed_addresses fixmap_idx) { struct acpi_hest_generic *g = ghes->generic; u32 len; @@ -312,25 +312,25 @@ static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr, if (!*buf_paddr) return -ENOENT; - ghes_copy_tofrom_phys(ghes->estatus, *buf_paddr, - sizeof(*ghes->estatus), 1, fixmap_idx); - if (!ghes->estatus->block_status) { + ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1, + fixmap_idx); + if (!estatus->block_status) { *buf_paddr = 0; return -ENOENT; } rc = -EIO; - len = cper_estatus_len(ghes->estatus); - if (len < sizeof(*ghes->estatus)) + len = cper_estatus_len(estatus); + if (len < sizeof(*estatus)) goto err_read_block; if (len > ghes->generic->error_block_length) goto err_read_block; - if (cper_estatus_check_header(ghes->estatus)) + if (cper_estatus_check_header(estatus)) goto err_read_block; - ghes_copy_tofrom_phys(ghes->estatus + 1, - *buf_paddr + sizeof(*ghes->estatus), - len - sizeof(*ghes->estatus), 1, fixmap_idx); - if (cper_estatus_check(ghes->estatus)) + ghes_copy_tofrom_phys(estatus + 1, + *buf_paddr + sizeof(*estatus), + len - sizeof(*estatus), 1, fixmap_idx); + if (cper_estatus_check(estatus)) goto err_read_block; rc = 0; @@ -342,16 +342,17 @@ err_read_block: return rc; } -static void ghes_clear_estatus(struct ghes *ghes, u64 buf_paddr, - enum fixed_addresses fixmap_idx) +static void ghes_clear_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 buf_paddr, enum fixed_addresses fixmap_idx) { - ghes->estatus->block_status = 0; + estatus->block_status = 0; if (!buf_paddr) return; - ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, - sizeof(ghes->estatus->block_status), 0, + ghes_copy_tofrom_phys(estatus, buf_paddr, + sizeof(estatus->block_status), 0, fixmap_idx); /* @@ -651,12 +652,13 @@ static void ghes_estatus_cache_add( rcu_read_unlock(); } -static void __ghes_panic(struct ghes *ghes, u64 buf_paddr, - enum fixed_addresses fixmap_idx) +static void __ghes_panic(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 buf_paddr, enum fixed_addresses fixmap_idx) { - __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); + __ghes_print_estatus(KERN_EMERG, ghes->generic, estatus); - ghes_clear_estatus(ghes, buf_paddr, fixmap_idx); + ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); /* reboot to log the error! */ if (!panic_timeout) @@ -666,25 +668,25 @@ static void __ghes_panic(struct ghes *ghes, u64 buf_paddr, static int ghes_proc(struct ghes *ghes) { + struct acpi_hest_generic_status *estatus = ghes->estatus; u64 buf_paddr; int rc; - rc = ghes_read_estatus(ghes, &buf_paddr, FIX_APEI_GHES_IRQ); + rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ); if (rc) goto out; - if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) { - __ghes_panic(ghes, buf_paddr, FIX_APEI_GHES_IRQ); - } + if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC) + __ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); - if (!ghes_estatus_cached(ghes->estatus)) { - if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) - ghes_estatus_cache_add(ghes->generic, ghes->estatus); + if (!ghes_estatus_cached(estatus)) { + if (ghes_print_estatus(NULL, ghes->generic, estatus)) + ghes_estatus_cache_add(ghes->generic, estatus); } - ghes_do_proc(ghes, ghes->estatus); + ghes_do_proc(ghes, estatus); out: - ghes_clear_estatus(ghes, buf_paddr, FIX_APEI_GHES_IRQ); + ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); return rc; } @@ -825,17 +827,20 @@ static void ghes_print_queued_estatus(void) } /* Save estatus for further processing in IRQ context */ -static void __process_error(struct ghes *ghes) +static void __process_error(struct ghes *ghes, + struct acpi_hest_generic_status *src_estatus) { -#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG u32 len, node_len; struct ghes_estatus_node *estatus_node; struct acpi_hest_generic_status *estatus; - if (ghes_estatus_cached(ghes->estatus)) + if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG)) return; - len = cper_estatus_len(ghes->estatus); + if (ghes_estatus_cached(src_estatus)) + return; + + len = cper_estatus_len(src_estatus); node_len = GHES_ESTATUS_NODE_LEN(len); estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); @@ -845,30 +850,31 @@ static void __process_error(struct ghes *ghes) estatus_node->ghes = ghes; estatus_node->generic = ghes->generic; estatus = GHES_ESTATUS_FROM_NODE(estatus_node); - memcpy(estatus, ghes->estatus, len); + memcpy(estatus, src_estatus, len); llist_add(&estatus_node->llnode, &ghes_estatus_llist); -#endif } static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, enum fixed_addresses fixmap_idx) { + struct acpi_hest_generic_status *estatus = ghes->estatus; u64 buf_paddr; int sev; - if (ghes_read_estatus(ghes, &buf_paddr, fixmap_idx)) { - ghes_clear_estatus(ghes, buf_paddr, fixmap_idx); + if (ghes_read_estatus(ghes, estatus, &buf_paddr, fixmap_idx)) { + ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); return -ENOENT; } - sev = ghes_severity(ghes->estatus->error_severity); + sev = ghes_severity(estatus->error_severity); if (sev >= GHES_SEV_PANIC) { ghes_print_queued_estatus(); - __ghes_panic(ghes, buf_paddr, fixmap_idx); + __ghes_panic(ghes, estatus, buf_paddr, fixmap_idx); + } - __process_error(ghes); - ghes_clear_estatus(ghes, buf_paddr, fixmap_idx); + __process_error(ghes, estatus); + ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); return 0; } From f2a681b9160b9c80826b3062e71371cfc82b4863 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:54 +0000 Subject: [PATCH 21/29] ACPI / APEI: Make GHES estatus header validation more user friendly ghes_read_estatus() checks various lengths in the top-level header to ensure the CPER records to be read aren't obviously corrupt. Take the opportunity to make this more user-friendly, printing a (ratelimited) message about the nature of the header format error. Suggested-by: Borislav Petkov Signed-off-by: James Morse [ rjw: Add missing 'static' ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 46 ++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 54746a791f3f..4fb274bb209e 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -293,6 +293,30 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, } } +/* Check the top-level record header has an appropriate size. */ +static int __ghes_check_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus) +{ + u32 len = cper_estatus_len(estatus); + + if (len < sizeof(*estatus)) { + pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n"); + return -EIO; + } + + if (len > ghes->generic->error_block_length) { + pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n"); + return -EIO; + } + + if (cper_estatus_check_header(estatus)) { + pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n"); + return -EIO; + } + + return 0; +} + static int ghes_read_estatus(struct ghes *ghes, struct acpi_hest_generic_status *estatus, u64 *buf_paddr, enum fixed_addresses fixmap_idx) @@ -319,27 +343,21 @@ static int ghes_read_estatus(struct ghes *ghes, return -ENOENT; } - rc = -EIO; + rc = __ghes_check_estatus(ghes, estatus); + if (rc) + return rc; + len = cper_estatus_len(estatus); - if (len < sizeof(*estatus)) - goto err_read_block; - if (len > ghes->generic->error_block_length) - goto err_read_block; - if (cper_estatus_check_header(estatus)) - goto err_read_block; ghes_copy_tofrom_phys(estatus + 1, *buf_paddr + sizeof(*estatus), len - sizeof(*estatus), 1, fixmap_idx); - if (cper_estatus_check(estatus)) - goto err_read_block; - rc = 0; - -err_read_block: - if (rc) + if (cper_estatus_check(estatus)) { pr_warn_ratelimited(FW_WARN GHES_PFX "Failed to read error status block!\n"); + return -EIO; + } - return rc; + return 0; } static void ghes_clear_estatus(struct ghes *ghes, From e00a6e3392cb623b7ac4d61c5e1c1234b4520cad Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:55 +0000 Subject: [PATCH 22/29] ACPI / APEI: Split ghes_read_estatus() to allow a peek at the CPER length ghes_read_estatus() reads the record address, then the record's header, then performs some sanity checks before reading the records into the provided estatus buffer. To provide this estatus buffer the caller must know the size of the records in advance, or always provide a worst-case sized buffer as happens today for the non-NMI notifications. Add a function to peek at the record's header to find the size. This will let the NMI path allocate the right amount of memory before reading the records, instead of using the worst-case size, and having to copy the records. Split ghes_read_estatus() to create __ghes_peek_estatus() which returns the address and size of the CPER records. Signed-off-by: James Morse Changes since v7: * Grammar * concistent argument ordering Changes since v6: * Additional buf_addr = 0 error handling * Moved checking out of peek-estatus * Reworded an error message so we can tell them apart Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 4fb274bb209e..4c1d8e5e9ada 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -317,12 +317,12 @@ static int __ghes_check_estatus(struct ghes *ghes, return 0; } -static int ghes_read_estatus(struct ghes *ghes, - struct acpi_hest_generic_status *estatus, - u64 *buf_paddr, enum fixed_addresses fixmap_idx) +/* Read the CPER block, returning its address, and header in estatus. */ +static int __ghes_peek_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 *buf_paddr, enum fixed_addresses fixmap_idx) { struct acpi_hest_generic *g = ghes->generic; - u32 len; int rc; rc = apei_read(buf_paddr, &g->error_status_address); @@ -343,14 +343,14 @@ static int ghes_read_estatus(struct ghes *ghes, return -ENOENT; } - rc = __ghes_check_estatus(ghes, estatus); - if (rc) - return rc; + return __ghes_check_estatus(ghes, estatus); +} - len = cper_estatus_len(estatus); - ghes_copy_tofrom_phys(estatus + 1, - *buf_paddr + sizeof(*estatus), - len - sizeof(*estatus), 1, fixmap_idx); +static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus, + u64 buf_paddr, enum fixed_addresses fixmap_idx, + size_t buf_len) +{ + ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx); if (cper_estatus_check(estatus)) { pr_warn_ratelimited(FW_WARN GHES_PFX "Failed to read error status block!\n"); @@ -360,6 +360,24 @@ static int ghes_read_estatus(struct ghes *ghes, return 0; } +static int ghes_read_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 *buf_paddr, enum fixed_addresses fixmap_idx) +{ + int rc; + + rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx); + if (rc) + return rc; + + rc = __ghes_check_estatus(ghes, estatus); + if (rc) + return rc; + + return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx, + cper_estatus_len(estatus)); +} + static void ghes_clear_estatus(struct ghes *ghes, struct acpi_hest_generic_status *estatus, u64 buf_paddr, enum fixed_addresses fixmap_idx) From d9f608dc156487b55cb17c2ec591b06e53a6de64 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:56 +0000 Subject: [PATCH 23/29] ACPI / APEI: Only use queued estatus entry during in_nmi_queue_one_entry() Each struct ghes has an worst-case sized buffer for storing the estatus. If an error is being processed by ghes_proc() in process context this buffer will be in use. If the error source then triggers an NMI-like notification, the same buffer will be used by in_nmi_queue_one_entry() to stage the estatus data, before __process_error() copys it into a queued estatus entry. Merge __process_error()s work into in_nmi_queue_one_entry() so that the queued estatus entry is used from the beginning. Use the new ghes_peek_estatus() to know how much memory to allocate from the ghes_estatus_pool before reading the records. Reported-by: Borislav Petkov Signed-off-by: James Morse Reviewed-by: Borislav Petkov Change since v6: * Added a comment explaining the 'ack-error, then goto no_work'. * Added missing esatus-clearing, which is necessary after reading the GAS, Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 64 +++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 4c1d8e5e9ada..eb5bf2ced1fb 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -862,57 +862,67 @@ static void ghes_print_queued_estatus(void) } } -/* Save estatus for further processing in IRQ context */ -static void __process_error(struct ghes *ghes, - struct acpi_hest_generic_status *src_estatus) +static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, + enum fixed_addresses fixmap_idx) { - u32 len, node_len; + struct acpi_hest_generic_status *estatus, tmp_header; struct ghes_estatus_node *estatus_node; - struct acpi_hest_generic_status *estatus; + u32 len, node_len; + u64 buf_paddr; + int sev, rc; if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG)) - return; + return -EOPNOTSUPP; - if (ghes_estatus_cached(src_estatus)) - return; + rc = __ghes_peek_estatus(ghes, &tmp_header, &buf_paddr, fixmap_idx); + if (rc) { + ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); + return rc; + } - len = cper_estatus_len(src_estatus); + rc = __ghes_check_estatus(ghes, &tmp_header); + if (rc) { + ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); + return rc; + } + + len = cper_estatus_len(&tmp_header); node_len = GHES_ESTATUS_NODE_LEN(len); - estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); if (!estatus_node) - return; + return -ENOMEM; estatus_node->ghes = ghes; estatus_node->generic = ghes->generic; estatus = GHES_ESTATUS_FROM_NODE(estatus_node); - memcpy(estatus, src_estatus, len); - llist_add(&estatus_node->llnode, &ghes_estatus_llist); -} -static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, - enum fixed_addresses fixmap_idx) -{ - struct acpi_hest_generic_status *estatus = ghes->estatus; - u64 buf_paddr; - int sev; - - if (ghes_read_estatus(ghes, estatus, &buf_paddr, fixmap_idx)) { + if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) { ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); - return -ENOENT; + rc = -ENOENT; + goto no_work; } sev = ghes_severity(estatus->error_severity); if (sev >= GHES_SEV_PANIC) { ghes_print_queued_estatus(); __ghes_panic(ghes, estatus, buf_paddr, fixmap_idx); - } - __process_error(ghes, estatus); - ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); + ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); - return 0; + /* This error has been reported before, don't process it again. */ + if (ghes_estatus_cached(estatus)) + goto no_work; + + llist_add(&estatus_node->llnode, &ghes_estatus_llist); + + return rc; + +no_work: + gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, + node_len); + + return rc; } static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list, From b972d2eaf0c7021579755eec6b2b79e0f5bc7930 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:57 +0000 Subject: [PATCH 24/29] ACPI / APEI: Use separate fixmap pages for arm64 NMI-like notifications Now that ghes notification helpers provide the fixmap slots and take the lock themselves, multiple NMI-like notifications can be used on arm64. These should be named after their notification method as they can't all be called 'NMI'. x86's NOTIFY_NMI already is, change the SEA fixmap entry to be called FIX_APEI_GHES_SEA. Future patches can add support for FIX_APEI_GHES_SEI and FIX_APEI_GHES_SDEI_{NORMAL,CRITICAL}. Because all of ghes.c builds on both architectures, provide a constant for each fixmap entry that the architecture will never use. Signed-off-by: James Morse Signed-off-by: Rafael J. Wysocki --- arch/arm64/include/asm/fixmap.h | 2 +- drivers/acpi/apei/ghes.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index ec1e6d6fa14c..966dd4bb23f2 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -55,7 +55,7 @@ enum fixed_addresses { #ifdef CONFIG_ACPI_APEI_GHES /* Used for GHES mapping from assorted contexts */ FIX_APEI_GHES_IRQ, - FIX_APEI_GHES_NMI, + FIX_APEI_GHES_SEA, #endif /* CONFIG_ACPI_APEI_GHES */ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index eb5bf2ced1fb..99707d565dcc 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -957,7 +957,7 @@ int ghes_notify_sea(void) int rv; raw_spin_lock(&ghes_notify_lock_sea); - rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_NMI); + rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_SEA); raw_spin_unlock(&ghes_notify_lock_sea); return rv; From f96935d3bc38a5f4b5188b6470a10e3fb8c3f0cc Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:49:01 +0000 Subject: [PATCH 25/29] firmware: arm_sdei: Add ACPI GHES registration helper APEI's Generic Hardware Error Source structures do not describe whether the SDEI event is shared or private, as this information is discoverable via the API. GHES needs to know whether an event is normal or critical to avoid sharing locks or fixmap entries, but GHES shouldn't have to know about the SDEI API. Add a helper to register the GHES using the appropriate normal or critical callback. Signed-off-by: James Morse Acked-by: Catalin Marinas Signed-off-by: Rafael J. Wysocki --- arch/arm64/include/asm/fixmap.h | 4 ++ drivers/firmware/arm_sdei.c | 68 +++++++++++++++++++++++++++++++++ include/linux/arm_sdei.h | 6 +++ 3 files changed, 78 insertions(+) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 966dd4bb23f2..f987b8a8f325 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -56,6 +56,10 @@ enum fixed_addresses { /* Used for GHES mapping from assorted contexts */ FIX_APEI_GHES_IRQ, FIX_APEI_GHES_SEA, +#ifdef CONFIG_ARM_SDE_INTERFACE + FIX_APEI_GHES_SDEI_NORMAL, + FIX_APEI_GHES_SDEI_CRITICAL, +#endif #endif /* CONFIG_ACPI_APEI_GHES */ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index c64c7da73829..e6376f985ef7 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -2,6 +2,7 @@ // Copyright (C) 2017 Arm Ltd. #define pr_fmt(fmt) "sdei: " fmt +#include #include #include #include @@ -887,6 +888,73 @@ static void sdei_smccc_hvc(unsigned long function_id, arm_smccc_hvc(function_id, arg0, arg1, arg2, arg3, arg4, 0, 0, res); } +int sdei_register_ghes(struct ghes *ghes, sdei_event_callback *normal_cb, + sdei_event_callback *critical_cb) +{ + int err; + u64 result; + u32 event_num; + sdei_event_callback *cb; + + if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES)) + return -EOPNOTSUPP; + + event_num = ghes->generic->notify.vector; + if (event_num == 0) { + /* + * Event 0 is reserved by the specification for + * SDEI_EVENT_SIGNAL. + */ + return -EINVAL; + } + + err = sdei_api_event_get_info(event_num, SDEI_EVENT_INFO_EV_PRIORITY, + &result); + if (err) + return err; + + if (result == SDEI_EVENT_PRIORITY_CRITICAL) + cb = critical_cb; + else + cb = normal_cb; + + err = sdei_event_register(event_num, cb, ghes); + if (!err) + err = sdei_event_enable(event_num); + + return err; +} + +int sdei_unregister_ghes(struct ghes *ghes) +{ + int i; + int err; + u32 event_num = ghes->generic->notify.vector; + + might_sleep(); + + if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES)) + return -EOPNOTSUPP; + + /* + * The event may be running on another CPU. Disable it + * to stop new events, then try to unregister a few times. + */ + err = sdei_event_disable(event_num); + if (err) + return err; + + for (i = 0; i < 3; i++) { + err = sdei_event_unregister(event_num); + if (err != -EINPROGRESS) + break; + + schedule(); + } + + return err; +} + static int sdei_get_conduit(struct platform_device *pdev) { const char *method; diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 942afbd544b7..393899192906 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -11,6 +11,7 @@ enum sdei_conduit_types { CONDUIT_HVC, }; +#include #include /* Arch code should override this to set the entry point from firmware... */ @@ -39,6 +40,11 @@ int sdei_event_unregister(u32 event_num); int sdei_event_enable(u32 event_num); int sdei_event_disable(u32 event_num); +/* GHES register/unregister helpers */ +int sdei_register_ghes(struct ghes *ghes, sdei_event_callback *normal_cb, + sdei_event_callback *critical_cb); +int sdei_unregister_ghes(struct ghes *ghes); + #ifdef CONFIG_ARM_SDE_INTERFACE /* For use by arch code when CPU hotplug notifiers are not appropriate. */ int sdei_mask_local_cpu(void); From f9f05395f384ee858520b6c65d7e3e436af20c53 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:49:02 +0000 Subject: [PATCH 26/29] ACPI / APEI: Add support for the SDEI GHES Notification type If the GHES notification type is SDEI, register the provided event using the SDEI-GHES helper. SDEI may be one of two types of event, normal and critical. Critical events can interrupt normal events, so these must have separate fixmap slots and locks in case both event types are in use. Signed-off-by: James Morse Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 85 ++++++++++++++++++++++++++++++++++++++++ include/linux/arm_sdei.h | 3 ++ 2 files changed, 88 insertions(+) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 99707d565dcc..0b5ae91fd0fb 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -25,6 +25,7 @@ * GNU General Public License for more details. */ +#include #include #include #include @@ -85,6 +86,15 @@ ((struct acpi_hest_generic_status *) \ ((struct ghes_estatus_node *)(estatus_node) + 1)) +/* + * NMI-like notifications vary by architecture, before the compiler can prune + * unused static functions it needs a value for these enums. + */ +#ifndef CONFIG_ARM_SDE_INTERFACE +#define FIX_APEI_GHES_SDEI_NORMAL __end_of_fixed_addresses +#define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses +#endif + static inline bool is_hest_type_generic_v2(struct ghes *ghes) { return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; @@ -1040,6 +1050,63 @@ static void ghes_nmi_init_cxt(void) init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); } +static int __ghes_sdei_callback(struct ghes *ghes, + enum fixed_addresses fixmap_idx) +{ + if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) { + irq_work_queue(&ghes_proc_irq_work); + + return 0; + } + + return -ENOENT; +} + +static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs, + void *arg) +{ + static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal); + struct ghes *ghes = arg; + int err; + + raw_spin_lock(&ghes_notify_lock_sdei_normal); + err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL); + raw_spin_unlock(&ghes_notify_lock_sdei_normal); + + return err; +} + +static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs, + void *arg) +{ + static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical); + struct ghes *ghes = arg; + int err; + + raw_spin_lock(&ghes_notify_lock_sdei_critical); + err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL); + raw_spin_unlock(&ghes_notify_lock_sdei_critical); + + return err; +} + +static int apei_sdei_register_ghes(struct ghes *ghes) +{ + if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) + return -EOPNOTSUPP; + + return sdei_register_ghes(ghes, ghes_sdei_normal_callback, + ghes_sdei_critical_callback); +} + +static int apei_sdei_unregister_ghes(struct ghes *ghes) +{ + if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) + return -EOPNOTSUPP; + + return sdei_unregister_ghes(ghes); +} + static int ghes_probe(struct platform_device *ghes_dev) { struct acpi_hest_generic *generic; @@ -1075,6 +1142,13 @@ static int ghes_probe(struct platform_device *ghes_dev) goto err; } break; + case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: + if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) { + pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n", + generic->header.source_id); + goto err; + } + break; case ACPI_HEST_NOTIFY_LOCAL: pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", generic->header.source_id); @@ -1138,6 +1212,11 @@ static int ghes_probe(struct platform_device *ghes_dev) case ACPI_HEST_NOTIFY_NMI: ghes_nmi_add(ghes); break; + case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: + rc = apei_sdei_register_ghes(ghes); + if (rc) + goto err; + break; default: BUG(); } @@ -1163,6 +1242,7 @@ err: static int ghes_remove(struct platform_device *ghes_dev) { + int rc; struct ghes *ghes; struct acpi_hest_generic *generic; @@ -1195,6 +1275,11 @@ static int ghes_remove(struct platform_device *ghes_dev) case ACPI_HEST_NOTIFY_NMI: ghes_nmi_remove(ghes); break; + case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: + rc = apei_sdei_unregister_ghes(ghes); + if (rc) + return rc; + break; default: BUG(); break; diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 393899192906..3305ea7f9dc7 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -12,7 +12,10 @@ enum sdei_conduit_types { }; #include + +#ifdef CONFIG_ARM_SDE_INTERFACE #include +#endif /* Arch code should override this to set the entry point from firmware... */ #ifndef sdei_arch_get_entry_point From a2ed1ee91f4f2c59b71db12c16bff659dee9ee0b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Feb 2019 13:23:23 +0100 Subject: [PATCH 27/29] MAINTAINERS: Add James Morse to the list of APEI reviewers Add James to the list of reviewers of the firmware-assisted RAS glue. Signed-off-by: Borislav Petkov Acked-by: James Morse Link: https://lkml.kernel.org/r/20190205083836.21641-1-bp@alien8.de Signed-off-by: Rafael J. Wysocki --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 4d04cebb4a71..7e524968c50f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -331,6 +331,7 @@ ACPI APEI M: "Rafael J. Wysocki" M: Len Brown L: linux-acpi@vger.kernel.org +R: James Morse R: Tony Luck R: Borislav Petkov F: drivers/acpi/apei/ From 1c0d9b1c31d19d3aa90296a184e4624545d7d1ee Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 28 Jan 2019 10:04:23 +0000 Subject: [PATCH 28/29] ACPI: APEI: Fix possible out-of-bounds access to BERT region Check that the length recorded in the generic error status block is within the region before checking the contents of the region itself. Otherwise it may result in an out-of-bounds access if the system firmware has generated a status block with an invalid length (larger than the mapped region). Also move the block_status check so that it only happens after the block has been verified to be within the mapped region. Signed-off-by: Ross Lagerwall Acked-by: Borislav Petkov Tested-by: Tyler Baicar Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/bert.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c index 12771fcf0417..0d948d0a41af 100644 --- a/drivers/acpi/apei/bert.c +++ b/drivers/acpi/apei/bert.c @@ -42,15 +42,7 @@ static void __init bert_print_all(struct acpi_bert_region *region, int remain = region_len; u32 estatus_len; - if (!estatus->block_status) - return; - - while (remain > sizeof(struct acpi_bert_region)) { - if (cper_estatus_check(estatus)) { - pr_err(FW_BUG "Invalid error record.\n"); - return; - } - + while (remain >= sizeof(struct acpi_bert_region)) { estatus_len = cper_estatus_len(estatus); if (remain < estatus_len) { pr_err(FW_BUG "Truncated status block (length: %u).\n", @@ -58,6 +50,15 @@ static void __init bert_print_all(struct acpi_bert_region *region, return; } + /* No more error records. */ + if (!estatus->block_status) + return; + + if (cper_estatus_check(estatus)) { + pr_err(FW_BUG "Invalid error record.\n"); + return; + } + pr_info_once("Error records from previous boot:\n"); cper_estatus_print(KERN_INFO HW_ERR, estatus); @@ -70,10 +71,6 @@ static void __init bert_print_all(struct acpi_bert_region *region, estatus->block_status = 0; estatus = (void *)estatus + estatus_len; - /* No more error records. */ - if (!estatus->block_status) - return; - remain -= estatus_len; } } From 45b14a4ffcc1e0b5caa246638f942cbe7eaea7ad Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 28 Jan 2019 10:04:24 +0000 Subject: [PATCH 29/29] efi: cper: Fix possible out-of-bounds access When checking a generic status block, we iterate over all the generic data blocks. The loop condition only checks that the start of the generic data block is valid (within estatus->data_length) but not the whole block. Because the size of data blocks (excluding error data) may vary depending on the revision and the revision is contained within the data block, ensure that enough of the current data block is valid before dereferencing any members otherwise an out-of-bounds access may occur if estatus->data_length is invalid. This relies on the fact that struct acpi_hest_generic_data_v300 is a superset of the earlier version. Also rework the other checks to avoid potential underflow. Signed-off-by: Ross Lagerwall Acked-by: Borislav Petkov Tested-by: Tyler Baicar Signed-off-by: Rafael J. Wysocki --- drivers/firmware/efi/cper.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index a7902fccdcfa..6090d25dce85 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -546,19 +546,24 @@ EXPORT_SYMBOL_GPL(cper_estatus_check_header); int cper_estatus_check(const struct acpi_hest_generic_status *estatus) { struct acpi_hest_generic_data *gdata; - unsigned int data_len, gedata_len; + unsigned int data_len, record_size; int rc; rc = cper_estatus_check_header(estatus); if (rc) return rc; + data_len = estatus->data_length; apei_estatus_for_each_section(estatus, gdata) { - gedata_len = acpi_hest_get_error_length(gdata); - if (gedata_len > data_len - acpi_hest_get_size(gdata)) + if (sizeof(struct acpi_hest_generic_data) > data_len) return -EINVAL; - data_len -= acpi_hest_get_record_size(gdata); + + record_size = acpi_hest_get_record_size(gdata); + if (record_size > data_len) + return -EINVAL; + + data_len -= record_size; } if (data_len) return -EINVAL;