From c996f3802006a585a6c3f8eaa73e375330efc0e7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 1 Mar 2018 16:13:36 +0100 Subject: [PATCH 1/7] x86/MSR: Move native_* variants to msr.h ... where they belong. No functional change. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Darren Kenny Cc: kvm@vger.kernel.org Link: https://lkml.kernel.org/r/20180301151336.12948-1-bp@alien8.de --- arch/x86/include/asm/microcode.h | 14 -------------- arch/x86/include/asm/msr.h | 14 ++++++++++++++ arch/x86/kvm/svm.c | 1 - arch/x86/kvm/vmx.c | 1 - 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 7fb1047d61c7..871714e2e4c6 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -6,20 +6,6 @@ #include #include -#define native_rdmsr(msr, val1, val2) \ -do { \ - u64 __val = __rdmsr((msr)); \ - (void)((val1) = (u32)__val); \ - (void)((val2) = (u32)(__val >> 32)); \ -} while (0) - -#define native_wrmsr(msr, low, high) \ - __wrmsr(msr, low, high) - -#define native_wrmsrl(msr, val) \ - __wrmsr((msr), (u32)((u64)(val)), \ - (u32)((u64)(val) >> 32)) - struct ucode_patch { struct list_head plist; void *data; /* Intel uses only this one */ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 30df295f6d94..77254c9c8f61 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -108,6 +108,20 @@ static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high) : : "c" (msr), "a"(low), "d" (high) : "memory"); } +#define native_rdmsr(msr, val1, val2) \ +do { \ + u64 __val = __rdmsr((msr)); \ + (void)((val1) = (u32)__val); \ + (void)((val2) = (u32)(__val >> 32)); \ +} while (0) + +#define native_wrmsr(msr, low, high) \ + __wrmsr(msr, low, high) + +#define native_wrmsrl(msr, val) \ + __wrmsr((msr), (u32)((u64)(val)), \ + (u32)((u64)(val) >> 32)) + static inline unsigned long long native_read_msr(unsigned int msr) { unsigned long long val; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index be9c839e2c89..9d2043f94e29 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 051dab74e4e9..ee5ed44bc284 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -51,7 +51,6 @@ #include #include #include -#include #include #include "trace.h" From 9558080935e0bd744d68a7e1747a7117310623cf Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 6 Mar 2018 10:49:13 +0100 Subject: [PATCH 2/7] x86/fault: Do not print IP in show_fault_oops() ... because __show_regs() already does that. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20180306094920.16917-3-bp@alien8.de --- arch/x86/mm/fault.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c88573d90f3e..93505990df10 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -699,7 +699,6 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, printk(KERN_CONT "paging request"); printk(KERN_CONT " at %px\n", (void *) address); - printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip); dump_pagetable(address); } From 16d1cb0bc43642a4d934631a73c5210ad2499e2f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 6 Mar 2018 10:49:14 +0100 Subject: [PATCH 3/7] x86/dumpstack: Unify show_regs() The 32-bit version uses KERN_EMERG and commit b0f4c4b32c8e ("bugs, x86: Fix printk levels for panic, softlockups and stack dumps") changed the 64-bit version to KERN_DEFAULT. The same justification in that commit that those messages do not belong in the terminal, holds true for 32-bit also, so make it so. Make code_bytes static, while at it. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20180306094920.16917-4-bp@alien8.de --- arch/x86/include/asm/stacktrace.h | 2 -- arch/x86/kernel/dumpstack.c | 49 ++++++++++++++++++++++++++++++- arch/x86/kernel/dumpstack_32.c | 42 -------------------------- arch/x86/kernel/dumpstack_64.c | 42 -------------------------- 4 files changed, 48 insertions(+), 87 deletions(-) diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index f73706878772..133d9425fced 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -87,8 +87,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs) void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, char *log_lvl); -extern unsigned int code_bytes; - /* The form of the top of the frame on the stack */ struct stack_frame { struct stack_frame *next_frame; diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index a2d8a3908670..18fa9d74c182 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -24,7 +24,7 @@ int panic_on_unrecovered_nmi; int panic_on_io_nmi; -unsigned int code_bytes = 64; +static unsigned int code_bytes = 64; static int die_counter; bool in_task_stack(unsigned long *stack, struct task_struct *task, @@ -375,3 +375,50 @@ static int __init code_bytes_setup(char *s) return 1; } __setup("code_bytes=", code_bytes_setup); + +void show_regs(struct pt_regs *regs) +{ + bool all = true; + int i; + + show_regs_print_info(KERN_DEFAULT); + + if (IS_ENABLED(CONFIG_X86_32)) + all = !user_mode(regs); + + __show_regs(regs, all); + + /* + * When in-kernel, we also print out the stack and code at the + * time of the fault.. + */ + if (!user_mode(regs)) { + unsigned int code_prologue = code_bytes * 43 / 64; + unsigned int code_len = code_bytes; + unsigned char c; + u8 *ip; + + show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT); + + printk(KERN_DEFAULT "Code: "); + + ip = (u8 *)regs->ip - code_prologue; + if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { + /* try starting at IP */ + ip = (u8 *)regs->ip; + code_len = code_len - code_prologue + 1; + } + for (i = 0; i < code_len; i++, ip++) { + if (ip < (u8 *)PAGE_OFFSET || + probe_kernel_address(ip, c)) { + pr_cont(" Bad RIP value."); + break; + } + if (ip == (u8 *)regs->ip) + pr_cont("<%02x> ", c); + else + pr_cont("%02x ", c); + } + } + pr_cont("\n"); +} diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 04170f63e3a1..cd53f3030e40 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -127,45 +127,3 @@ unknown: info->type = STACK_TYPE_UNKNOWN; return -EINVAL; } - -void show_regs(struct pt_regs *regs) -{ - int i; - - show_regs_print_info(KERN_EMERG); - __show_regs(regs, !user_mode(regs)); - - /* - * When in-kernel, we also print out the stack and code at the - * time of the fault.. - */ - if (!user_mode(regs)) { - unsigned int code_prologue = code_bytes * 43 / 64; - unsigned int code_len = code_bytes; - unsigned char c; - u8 *ip; - - show_trace_log_lvl(current, regs, NULL, KERN_EMERG); - - pr_emerg("Code:"); - - ip = (u8 *)regs->ip - code_prologue; - if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - /* try starting at IP */ - ip = (u8 *)regs->ip; - code_len = code_len - code_prologue + 1; - } - for (i = 0; i < code_len; i++, ip++) { - if (ip < (u8 *)PAGE_OFFSET || - probe_kernel_address(ip, c)) { - pr_cont(" Bad EIP value."); - break; - } - if (ip == (u8 *)regs->ip) - pr_cont(" <%02x>", c); - else - pr_cont(" %02x", c); - } - } - pr_cont("\n"); -} diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 563e28d14f2c..5cdb9e84da57 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -149,45 +149,3 @@ unknown: info->type = STACK_TYPE_UNKNOWN; return -EINVAL; } - -void show_regs(struct pt_regs *regs) -{ - int i; - - show_regs_print_info(KERN_DEFAULT); - __show_regs(regs, 1); - - /* - * When in-kernel, we also print out the stack and code at the - * time of the fault.. - */ - if (!user_mode(regs)) { - unsigned int code_prologue = code_bytes * 43 / 64; - unsigned int code_len = code_bytes; - unsigned char c; - u8 *ip; - - show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT); - - printk(KERN_DEFAULT "Code: "); - - ip = (u8 *)regs->ip - code_prologue; - if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - /* try starting at IP */ - ip = (u8 *)regs->ip; - code_len = code_len - code_prologue + 1; - } - for (i = 0; i < code_len; i++, ip++) { - if (ip < (u8 *)PAGE_OFFSET || - probe_kernel_address(ip, c)) { - pr_cont(" Bad RIP value."); - break; - } - if (ip == (u8 *)regs->ip) - pr_cont("<%02x> ", c); - else - pr_cont("%02x ", c); - } - } - pr_cont("\n"); -} From 13cc36d76bc4f5a9801ae32630bc8240ba0cc522 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Fri, 9 Mar 2018 10:42:50 -0800 Subject: [PATCH 4/7] x86/rtc: Stop using deprecated functions rtc_time_to_tm() and rtc_tm_to_time() are deprecated because they rely on 32bits variables and that will make rtc break in y2038/2016. Use the proper y2038 safe functions. Signed-off-by: Benjamin Gaignard Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Cc: Prarit Bhargava Cc: Arnd Bergmann Cc: Richard Cochran Cc: Stephen Boyd Cc: Miroslav Lichvar Cc: Alexandre Belloni Link: https://lkml.kernel.org/r/1520620971-9567-5-git-send-email-john.stultz@linaro.org --- arch/x86/kernel/rtc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 69ac9cb9cac6..f7b82ed7b5b5 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -41,11 +41,11 @@ EXPORT_SYMBOL(rtc_lock); */ int mach_set_rtc_mmss(const struct timespec *now) { - unsigned long nowtime = now->tv_sec; + unsigned long long nowtime = now->tv_sec; struct rtc_time tm; int retval = 0; - rtc_time_to_tm(nowtime, &tm); + rtc_time64_to_tm(nowtime, &tm); if (!rtc_valid_tm(&tm)) { retval = mc146818_set_time(&tm); if (retval) @@ -53,7 +53,7 @@ int mach_set_rtc_mmss(const struct timespec *now) __func__, retval); } else { printk(KERN_ERR - "%s: Invalid RTC value: write of %lx to RTC failed\n", + "%s: Invalid RTC value: write of %llx to RTC failed\n", __func__, nowtime); retval = -EINVAL; } From 07cde313b2d21f728cec2836db7cdb55476f7a26 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 23 Mar 2018 14:58:17 -0700 Subject: [PATCH 5/7] x86/msr: Allow rdmsr_safe_on_cpu() to schedule High latencies can be observed caused by a daemon periodically reading various MSR on all cpus. On KASAN enabled kernels ~10ms latencies can be observed simply reading one MSR. Even without KASAN, sending an IPI to a CPU, which is in a deep sleep state or in a long hard IRQ disabled section, waiting for the answer can consume hundreds of microseconds. All usage sites are in preemptible context, convert rdmsr_safe_on_cpu() to use a completion instead of busy polling. Overall daemon cpu usage was reduced by 35 %, and latencies caused by msr_read() disappeared. Signed-off-by: Eric Dumazet Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Hugh Dickins Cc: Borislav Petkov Cc: Eric Dumazet Link: https://lkml.kernel.org/r/20180323215818.127774-1-edumazet@google.com --- arch/x86/lib/msr-smp.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c index 693cce0be82d..761ba062afda 100644 --- a/arch/x86/lib/msr-smp.c +++ b/arch/x86/lib/msr-smp.c @@ -2,6 +2,7 @@ #include #include #include +#include #include static void __rdmsr_on_cpu(void *info) @@ -143,13 +144,19 @@ void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) } EXPORT_SYMBOL(wrmsr_on_cpus); +struct msr_info_completion { + struct msr_info msr; + struct completion done; +}; + /* These "safe" variants are slower and should be used when the target MSR may not actually exist. */ static void __rdmsr_safe_on_cpu(void *info) { - struct msr_info *rv = info; + struct msr_info_completion *rv = info; - rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h); + rv->msr.err = rdmsr_safe(rv->msr.msr_no, &rv->msr.reg.l, &rv->msr.reg.h); + complete(&rv->done); } static void __wrmsr_safe_on_cpu(void *info) @@ -161,17 +168,26 @@ static void __wrmsr_safe_on_cpu(void *info) int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { + struct msr_info_completion rv; + call_single_data_t csd = { + .func = __rdmsr_safe_on_cpu, + .info = &rv, + }; int err; - struct msr_info rv; memset(&rv, 0, sizeof(rv)); + init_completion(&rv.done); + rv.msr.msr_no = msr_no; - rv.msr_no = msr_no; - err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); - *l = rv.reg.l; - *h = rv.reg.h; + err = smp_call_function_single_async(cpu, &csd); + if (!err) { + wait_for_completion(&rv.done); + err = rv.msr.err; + } + *l = rv.msr.reg.l; + *h = rv.msr.reg.h; - return err ? err : rv.err; + return err; } EXPORT_SYMBOL(rdmsr_safe_on_cpu); From 67bbd7a8d6bcdc44cc27105ae8c374e9176ceaf1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 23 Mar 2018 14:58:18 -0700 Subject: [PATCH 6/7] x86/cpuid: Allow cpuid_read() to schedule High latencies can be observed caused by a daemon periodically reading CPUID on all cpus. On KASAN enabled kernels ~10ms latencies can be observed. Even without KASAN, sending an IPI to a CPU, which is in a deep sleep state or in a long hard IRQ disabled section, waiting for the answer can consume hundreds of microseconds. cpuid_read() is invoked in preemptible context, so it can be converted to sleep instead of busy wait. Switching to smp_call_function_single_async() and a completion allows to reschedule and reduces CPU usage and latencies. Signed-off-by: Eric Dumazet Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Hugh Dickins Cc: Borislav Petkov Cc: Eric Dumazet Link: https://lkml.kernel.org/r/20180323215818.127774-2-edumazet@google.com --- arch/x86/kernel/cpuid.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 0931a105ffe1..1d300f96df4b 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -47,19 +48,27 @@ static struct class *cpuid_class; static enum cpuhp_state cpuhp_cpuid_state; +struct cpuid_regs_done { + struct cpuid_regs regs; + struct completion done; +}; + static void cpuid_smp_cpuid(void *cmd_block) { - struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block; + struct cpuid_regs_done *cmd = cmd_block; - cpuid_count(cmd->eax, cmd->ecx, - &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx); + cpuid_count(cmd->regs.eax, cmd->regs.ecx, + &cmd->regs.eax, &cmd->regs.ebx, + &cmd->regs.ecx, &cmd->regs.edx); + + complete(&cmd->done); } static ssize_t cpuid_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { char __user *tmp = buf; - struct cpuid_regs cmd; + struct cpuid_regs_done cmd; int cpu = iminor(file_inode(file)); u64 pos = *ppos; ssize_t bytes = 0; @@ -68,19 +77,28 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, if (count % 16) return -EINVAL; /* Invalid chunk size */ + init_completion(&cmd.done); for (; count; count -= 16) { - cmd.eax = pos; - cmd.ecx = pos >> 32; - err = smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1); + call_single_data_t csd = { + .func = cpuid_smp_cpuid, + .info = &cmd, + }; + + cmd.regs.eax = pos; + cmd.regs.ecx = pos >> 32; + + err = smp_call_function_single_async(cpu, &csd); if (err) break; - if (copy_to_user(tmp, &cmd, 16)) { + wait_for_completion(&cmd.done); + if (copy_to_user(tmp, &cmd.regs, 16)) { err = -EFAULT; break; } tmp += 16; bytes += 16; *ppos = ++pos; + reinit_completion(&cmd.done); } return bytes ? bytes : err; From 9b9a51354cae933f5640b5bb73bbcd32f989122f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Mar 2018 20:22:33 -0700 Subject: [PATCH 7/7] x86/msr: Make rdmsrl_safe_on_cpu() scheduling safe as well When changing rdmsr_safe_on_cpu() to schedule, it was missed that __rdmsr_safe_on_cpu() was also used by rdmsrl_safe_on_cpu() Make rdmsrl_safe_on_cpu() a wrapper instead of copy/pasting the code which was added for the completion handling. Fixes: 07cde313b2d2 ("x86/msr: Allow rdmsr_safe_on_cpu() to schedule") Reported-by: kbuild test robot Signed-off-by: Eric Dumazet Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Eric Dumazet Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180328032233.153055-1-edumazet@google.com --- arch/x86/lib/msr-smp.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c index 761ba062afda..fee8b9c0520c 100644 --- a/arch/x86/lib/msr-smp.c +++ b/arch/x86/lib/msr-smp.c @@ -225,16 +225,13 @@ EXPORT_SYMBOL(wrmsrl_safe_on_cpu); int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) { + u32 low, high; int err; - struct msr_info rv; - memset(&rv, 0, sizeof(rv)); + err = rdmsr_safe_on_cpu(cpu, msr_no, &low, &high); + *q = (u64)high << 32 | low; - rv.msr_no = msr_no; - err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); - *q = rv.reg.q; - - return err ? err : rv.err; + return err; } EXPORT_SYMBOL(rdmsrl_safe_on_cpu);