From 9bc284ca0b6a1fdbb71fc5b6a0e1b65d743cf2ad Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 25 Jan 2021 09:17:48 +0100 Subject: [PATCH 1/6] printk: rectify kernel-doc for prb_rec_init_wr() The command 'find ./kernel/printk/ | xargs ./scripts/kernel-doc -none' reported a mismatch with the kernel-doc of prb_rec_init_wr(). Rectify the kernel-doc, such that no issues remain for ./kernel/printk/. Signed-off-by: Lukas Bulwahn Reviewed-by: John Ogness Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210125081748.19903-1-lukas.bulwahn@gmail.com --- kernel/printk/printk_ringbuffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h index 5dc9d022db07..73cc80e01cef 100644 --- a/kernel/printk/printk_ringbuffer.h +++ b/kernel/printk/printk_ringbuffer.h @@ -287,7 +287,7 @@ _DEFINE_PRINTKRB(name, descbits, avgtextbits, &_##name##_text[0]) /* Writer Interface */ /** - * prb_rec_init_wd() - Initialize a buffer for writing records. + * prb_rec_init_wr() - Initialize a buffer for writing records. * * @r: The record to initialize. * @text_buf_size: The needed text buffer size. From 8a8109f303e25a27f92c1d8edd67d7cbbc60a4eb Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Wed, 10 Feb 2021 11:48:23 +0800 Subject: [PATCH 2/6] printk: fix deadlock when kernel panic printk_safe_flush_on_panic() caused the following deadlock on our server: CPU0: CPU1: panic rcu_dump_cpu_stacks kdump_nmi_shootdown_cpus nmi_trigger_cpumask_backtrace register_nmi_handler(crash_nmi_callback) printk_safe_flush __printk_safe_flush raw_spin_lock_irqsave(&read_lock) // send NMI to other processors apic_send_IPI_allbutself(NMI_VECTOR) // NMI interrupt, dead loop crash_nmi_callback printk_safe_flush_on_panic printk_safe_flush __printk_safe_flush // deadlock raw_spin_lock_irqsave(&read_lock) DEADLOCK: read_lock is taken on CPU1 and will never get released. It happens when panic() stops a CPU by NMI while it has been in the middle of printk_safe_flush(). Handle the lock the same way as logbuf_lock. The printk_safe buffers are flushed only when both locks can be safely taken. It can avoid the deadlock _in this particular case_ at expense of losing contents of printk_safe buffers. Note: It would actually be safe to re-init the locks when all CPUs were stopped by NMI. But it would require passing this information from arch-specific code. It is not worth the complexity. Especially because logbuf_lock and printk_safe buffers have been obsoleted by the lockless ring buffer. Fixes: cf9b1106c81c ("printk/nmi: flush NMI messages on the system panic") Signed-off-by: Muchun Song Reviewed-by: Petr Mladek Cc: Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210210034823.64867-1-songmuchun@bytedance.com --- kernel/printk/printk_safe.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index a0e6f746de6c..2e9e3ed7d63e 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -45,6 +45,8 @@ struct printk_safe_seq_buf { static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq); static DEFINE_PER_CPU(int, printk_context); +static DEFINE_RAW_SPINLOCK(safe_read_lock); + #ifdef CONFIG_PRINTK_NMI static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); #endif @@ -180,8 +182,6 @@ static void report_message_lost(struct printk_safe_seq_buf *s) */ static void __printk_safe_flush(struct irq_work *work) { - static raw_spinlock_t read_lock = - __RAW_SPIN_LOCK_INITIALIZER(read_lock); struct printk_safe_seq_buf *s = container_of(work, struct printk_safe_seq_buf, work); unsigned long flags; @@ -195,7 +195,7 @@ static void __printk_safe_flush(struct irq_work *work) * different CPUs. This is especially important when printing * a backtrace. */ - raw_spin_lock_irqsave(&read_lock, flags); + raw_spin_lock_irqsave(&safe_read_lock, flags); i = 0; more: @@ -232,7 +232,7 @@ more: out: report_message_lost(s); - raw_spin_unlock_irqrestore(&read_lock, flags); + raw_spin_unlock_irqrestore(&safe_read_lock, flags); } /** @@ -278,6 +278,14 @@ void printk_safe_flush_on_panic(void) raw_spin_lock_init(&logbuf_lock); } + if (raw_spin_is_locked(&safe_read_lock)) { + if (num_online_cpus() > 1) + return; + + debug_locks_off(); + raw_spin_lock_init(&safe_read_lock); + } + printk_safe_flush(); } From 13791c80b0cdf54d92fc54221cdf490683b109de Mon Sep 17 00:00:00 2001 From: John Ogness Date: Thu, 11 Feb 2021 18:37:52 +0106 Subject: [PATCH 3/6] printk: avoid prb_first_valid_seq() where possible If message sizes average larger than expected (more than 32 characters), the data_ring will wrap before the desc_ring. Once the data_ring wraps, it will start invalidating descriptors. These invalid descriptors hang around until they are eventually recycled when the desc_ring wraps. Readers do not care about invalid descriptors, but they still need to iterate past them. If the average message size is much larger than 32 characters, then there will be many invalid descriptors preceding the valid descriptors. The function prb_first_valid_seq() always begins at the oldest descriptor and searches for the first valid descriptor. This can be rather expensive for the above scenario. And, in fact, because of its heavy usage in /dev/kmsg, there have been reports of long delays and even RCU stalls. For code that does not need to search from the oldest record, replace prb_first_valid_seq() usage with prb_read_valid_*() functions, which provide a start sequence number to search from. Fixes: 896fbe20b4e2333fb55 ("printk: use the lockless ringbuffer") Reported-by: kernel test robot Reported-by: J. Avila Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210211173152.1629-1-john.ogness@linutronix.de --- kernel/printk/printk.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c55cd1820689..c7239d169bbe 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -736,9 +736,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, logbuf_lock_irq(); } - if (user->seq < prb_first_valid_seq(prb)) { + if (r->info->seq != user->seq) { /* our last seen message is gone, return error and reset */ - user->seq = prb_first_valid_seq(prb); + user->seq = r->info->seq; ret = -EPIPE; logbuf_unlock_irq(); goto out; @@ -813,6 +813,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) static __poll_t devkmsg_poll(struct file *file, poll_table *wait) { struct devkmsg_user *user = file->private_data; + struct printk_info info; __poll_t ret = 0; if (!user) @@ -821,9 +822,9 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait) poll_wait(file, &log_wait, wait); logbuf_lock_irq(); - if (prb_read_valid(prb, user->seq, NULL)) { + if (prb_read_valid_info(prb, user->seq, &info, NULL)) { /* return error when data has vanished underneath us */ - if (user->seq < prb_first_valid_seq(prb)) + if (info.seq != user->seq) ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; else ret = EPOLLIN|EPOLLRDNORM; @@ -1560,6 +1561,7 @@ static void syslog_clear(void) int do_syslog(int type, char __user *buf, int len, int source) { + struct printk_info info; bool clear = false; static int saved_console_loglevel = LOGLEVEL_DEFAULT; int error; @@ -1630,9 +1632,14 @@ int do_syslog(int type, char __user *buf, int len, int source) /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: logbuf_lock_irq(); - if (syslog_seq < prb_first_valid_seq(prb)) { + if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { + /* No unread messages. */ + logbuf_unlock_irq(); + return 0; + } + if (info.seq != syslog_seq) { /* messages are gone, move to first one */ - syslog_seq = prb_first_valid_seq(prb); + syslog_seq = info.seq; syslog_partial = 0; } if (source == SYSLOG_FROM_PROC) { @@ -1644,7 +1651,6 @@ int do_syslog(int type, char __user *buf, int len, int source) error = prb_next_seq(prb) - syslog_seq; } else { bool time = syslog_partial ? syslog_time : printk_time; - struct printk_info info; unsigned int line_count; u64 seq; @@ -3425,9 +3431,11 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, goto out; logbuf_lock_irqsave(flags); - if (dumper->cur_seq < prb_first_valid_seq(prb)) { - /* messages are gone, move to first available one */ - dumper->cur_seq = prb_first_valid_seq(prb); + if (prb_read_valid_info(prb, dumper->cur_seq, &info, NULL)) { + if (info.seq != dumper->cur_seq) { + /* messages are gone, move to first available one */ + dumper->cur_seq = info.seq; + } } /* last entry */ From 4e89a78779647ca7ee2967551c599633fe9d3647 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Sun, 14 Feb 2021 10:13:46 -0600 Subject: [PATCH 4/6] lib: use KSTM_MODULE_GLOBALS macro in kselftest drivers Instead of defining the total/failed test counters manually, test drivers that are clients of kselftest should use the macro created for this purpose. Signed-off-by: Timur Tabi Reviewed-by: Petr Mladek Acked-by: Marco Elver Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210214161348.369023-2-timur@kernel.org --- lib/test_bitmap.c | 3 +-- lib/test_printf.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 4425a1dd4ef1..0ea0e8258f14 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -16,8 +16,7 @@ #include "../tools/testing/selftests/kselftest_module.h" -static unsigned total_tests __initdata; -static unsigned failed_tests __initdata; +KSTM_MODULE_GLOBALS(); static char pbl_buffer[PAGE_SIZE] __initdata; diff --git a/lib/test_printf.c b/lib/test_printf.c index 7ac87f18a10f..ad2bcfa8caa1 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -30,8 +30,8 @@ #define PAD_SIZE 16 #define FILL_CHAR '$' -static unsigned total_tests __initdata; -static unsigned failed_tests __initdata; +KSTM_MODULE_GLOBALS(); + static char *test_buffer __initdata; static char *alloced_buffer __initdata; From d9d4de2309cd1721421c6488f1bb5744d2c83a39 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Sun, 14 Feb 2021 10:13:47 -0600 Subject: [PATCH 5/6] kselftest: add support for skipped tests Update the kselftest framework to allow client drivers to specify that some tests were skipped. Signed-off-by: Timur Tabi Reviewed-by: Petr Mladek Tested-by: Petr Mladek Acked-by: Marco Elver Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210214161348.369023-3-timur@kernel.org --- tools/testing/selftests/kselftest_module.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kselftest_module.h b/tools/testing/selftests/kselftest_module.h index e8eafaf0941a..e2ea41de3f35 100644 --- a/tools/testing/selftests/kselftest_module.h +++ b/tools/testing/selftests/kselftest_module.h @@ -11,7 +11,8 @@ #define KSTM_MODULE_GLOBALS() \ static unsigned int total_tests __initdata; \ -static unsigned int failed_tests __initdata +static unsigned int failed_tests __initdata; \ +static unsigned int skipped_tests __initdata #define KSTM_CHECK_ZERO(x) do { \ total_tests++; \ @@ -21,11 +22,16 @@ static unsigned int failed_tests __initdata } \ } while (0) -static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests) +static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests, + unsigned int skipped_tests) { - if (failed_tests == 0) - pr_info("all %u tests passed\n", total_tests); - else + if (failed_tests == 0) { + if (skipped_tests) { + pr_info("skipped %u tests\n", skipped_tests); + pr_info("remaining %u tests passed\n", total_tests); + } else + pr_info("all %u tests passed\n", total_tests); + } else pr_warn("failed %u out of %u tests\n", failed_tests, total_tests); return failed_tests ? -EINVAL : 0; @@ -36,7 +42,7 @@ static int __init __module##_init(void) \ { \ pr_info("loaded.\n"); \ selftest(); \ - return kstm_report(total_tests, failed_tests); \ + return kstm_report(total_tests, failed_tests, skipped_tests); \ } \ static void __exit __module##_exit(void) \ { \ From 5ead723a20e0447bc7db33dc3070b420e5f80aa6 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Sun, 14 Feb 2021 10:13:48 -0600 Subject: [PATCH 6/6] lib/vsprintf: no_hash_pointers prints all addresses as unhashed If the no_hash_pointers command line parameter is set, then printk("%p") will print pointers as unhashed, which is useful for debugging purposes. This change applies to any function that uses vsprintf, such as print_hex_dump() and seq_buf_printf(). A large warning message is displayed if this option is enabled. Unhashed pointers expose kernel addresses, which can be a security risk. Also update test_printf to skip the hashed pointer tests if the command-line option is set. Signed-off-by: Timur Tabi Acked-by: Petr Mladek Acked-by: Randy Dunlap Acked-by: Sergey Senozhatsky Acked-by: Vlastimil Babka Acked-by: Marco Elver Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210214161348.369023-4-timur@kernel.org --- .../admin-guide/kernel-parameters.txt | 15 ++++++++ lib/test_printf.c | 8 +++++ lib/vsprintf.c | 36 +++++++++++++++++-- 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c722ec19cd00..c549bc789108 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3281,6 +3281,21 @@ in certain environments such as networked servers or real-time systems. + no_hash_pointers + Force pointers printed to the console or buffers to be + unhashed. By default, when a pointer is printed via %p + format string, that pointer is "hashed", i.e. obscured + by hashing the pointer value. This is a security feature + that hides actual kernel addresses from unprivileged + users, but it also makes debugging the kernel more + difficult since unequal pointers can no longer be + compared. However, if this command-line option is + specified, then all normal pointers will have their true + value printed. Pointers printed via %pK may still be + hashed. This option should only be specified when + debugging the kernel. Please do not use on production + kernels. + nohibernate [HIBERNATION] Disable hibernation and resume. nohz= [KNL] Boottime enable/disable dynamic ticks diff --git a/lib/test_printf.c b/lib/test_printf.c index ad2bcfa8caa1..a6755798e9e6 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -35,6 +35,8 @@ KSTM_MODULE_GLOBALS(); static char *test_buffer __initdata; static char *alloced_buffer __initdata; +extern bool no_hash_pointers; + static int __printf(4, 0) __init do_test(int bufsize, const char *expect, int elen, const char *fmt, va_list ap) @@ -301,6 +303,12 @@ plain(void) { int err; + if (no_hash_pointers) { + pr_warn("skipping plain 'p' tests"); + skipped_tests += 2; + return; + } + err = plain_hash(); if (err) { pr_warn("plain 'p' does not appear to be hashed\n"); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3b53c73580c5..41ddc353ebb8 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2090,6 +2090,32 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode, return widen_string(buf, buf - buf_start, end, spec); } +/* Disable pointer hashing if requested */ +bool no_hash_pointers __ro_after_init; +EXPORT_SYMBOL_GPL(no_hash_pointers); + +static int __init no_hash_pointers_enable(char *str) +{ + no_hash_pointers = true; + + pr_warn("**********************************************************\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("** **\n"); + pr_warn("** This system shows unhashed kernel memory addresses **\n"); + pr_warn("** via the console, logs, and other interfaces. This **\n"); + pr_warn("** might reduce the security of your system. **\n"); + pr_warn("** **\n"); + pr_warn("** If you see this message and you are not debugging **\n"); + pr_warn("** the kernel, report this immediately to your system **\n"); + pr_warn("** administrator! **\n"); + pr_warn("** **\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("**********************************************************\n"); + + return 0; +} +early_param("no_hash_pointers", no_hash_pointers_enable); + /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format @@ -2297,8 +2323,14 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, } } - /* default is to _not_ leak addresses, hash before printing */ - return ptr_to_id(buf, end, ptr, spec); + /* + * default is to _not_ leak addresses, so hash before printing, + * unless no_hash_pointers is specified on the command line. + */ + if (unlikely(no_hash_pointers)) + return pointer_string(buf, end, ptr, spec); + else + return ptr_to_id(buf, end, ptr, spec); } /*