1
0
Fork 0
Commit Graph

348 Commits (6396bb221514d2876fd6dc0aa2a1f240d99b37bb)

Author SHA1 Message Date
Kees Cook 6396bb2215 treewide: kzalloc() -> kcalloc()
The kzalloc() function has a 2-factor argument form, kcalloc(). This
patch replaces cases of:

        kzalloc(a * b, gfp)

with:
        kcalloc(a * b, gfp)

as well as handling cases of:

        kzalloc(a * b * c, gfp)

with:

        kzalloc(array3_size(a, b, c), gfp)

as it's slightly less ugly than:

        kzalloc_array(array_size(a, b), c, gfp)

This does, however, attempt to ignore constant size factors like:

        kzalloc(4 * 1024, gfp)

though any constants defined via macros get caught up in the conversion.

Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.

The Coccinelle script used for this was:

// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@

(
  kzalloc(
-	(sizeof(TYPE)) * E
+	sizeof(TYPE) * E
  , ...)
|
  kzalloc(
-	(sizeof(THING)) * E
+	sizeof(THING) * E
  , ...)
)

// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@

(
  kzalloc(
-	sizeof(u8) * (COUNT)
+	COUNT
  , ...)
|
  kzalloc(
-	sizeof(__u8) * (COUNT)
+	COUNT
  , ...)
|
  kzalloc(
-	sizeof(char) * (COUNT)
+	COUNT
  , ...)
|
  kzalloc(
-	sizeof(unsigned char) * (COUNT)
+	COUNT
  , ...)
|
  kzalloc(
-	sizeof(u8) * COUNT
+	COUNT
  , ...)
|
  kzalloc(
-	sizeof(__u8) * COUNT
+	COUNT
  , ...)
|
  kzalloc(
-	sizeof(char) * COUNT
+	COUNT
  , ...)
|
  kzalloc(
-	sizeof(unsigned char) * COUNT
+	COUNT
  , ...)
)

// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@

(
- kzalloc
+ kcalloc
  (
-	sizeof(TYPE) * (COUNT_ID)
+	COUNT_ID, sizeof(TYPE)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(TYPE) * COUNT_ID
+	COUNT_ID, sizeof(TYPE)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(TYPE) * (COUNT_CONST)
+	COUNT_CONST, sizeof(TYPE)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(TYPE) * COUNT_CONST
+	COUNT_CONST, sizeof(TYPE)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(THING) * (COUNT_ID)
+	COUNT_ID, sizeof(THING)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(THING) * COUNT_ID
+	COUNT_ID, sizeof(THING)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(THING) * (COUNT_CONST)
+	COUNT_CONST, sizeof(THING)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(THING) * COUNT_CONST
+	COUNT_CONST, sizeof(THING)
  , ...)
)

// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@

- kzalloc
+ kcalloc
  (
-	SIZE * COUNT
+	COUNT, SIZE
  , ...)

// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@

(
  kzalloc(
-	sizeof(TYPE) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kzalloc(
-	sizeof(TYPE) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kzalloc(
-	sizeof(TYPE) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kzalloc(
-	sizeof(TYPE) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kzalloc(
-	sizeof(THING) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  kzalloc(
-	sizeof(THING) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  kzalloc(
-	sizeof(THING) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  kzalloc(
-	sizeof(THING) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
)

// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@

(
  kzalloc(
-	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  kzalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  kzalloc(
-	sizeof(THING1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  kzalloc(
-	sizeof(THING1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  kzalloc(
-	sizeof(TYPE1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
|
  kzalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
)

// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@

(
  kzalloc(
-	(COUNT) * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kzalloc(
-	COUNT * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kzalloc(
-	COUNT * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kzalloc(
-	(COUNT) * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kzalloc(
-	COUNT * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kzalloc(
-	(COUNT) * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kzalloc(
-	(COUNT) * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kzalloc(
-	COUNT * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
)

// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@

(
  kzalloc(C1 * C2 * C3, ...)
|
  kzalloc(
-	(E1) * E2 * E3
+	array3_size(E1, E2, E3)
  , ...)
|
  kzalloc(
-	(E1) * (E2) * E3
+	array3_size(E1, E2, E3)
  , ...)
|
  kzalloc(
-	(E1) * (E2) * (E3)
+	array3_size(E1, E2, E3)
  , ...)
|
  kzalloc(
-	E1 * E2 * E3
+	array3_size(E1, E2, E3)
  , ...)
)

// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@

(
  kzalloc(sizeof(THING) * C2, ...)
|
  kzalloc(sizeof(TYPE) * C2, ...)
|
  kzalloc(C1 * C2 * C3, ...)
|
  kzalloc(C1 * C2, ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(TYPE) * (E2)
+	E2, sizeof(TYPE)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(TYPE) * E2
+	E2, sizeof(TYPE)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(THING) * (E2)
+	E2, sizeof(THING)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(THING) * E2
+	E2, sizeof(THING)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	(E1) * E2
+	E1, E2
  , ...)
|
- kzalloc
+ kcalloc
  (
-	(E1) * (E2)
+	E1, E2
  , ...)
|
- kzalloc
+ kcalloc
  (
-	E1 * E2
+	E1, E2
  , ...)
)

Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 16:19:22 -07:00
David Wang 13e8582245 x86/MCE: Enable MCE broadcasting on new Centaur CPUs
Newer Centaur multi-core CPUs also support MCE broadcasting to all
cores. Add a Centaur-specific init function setting that up.

 [ bp:
   - make mce_centaur_feature_init() static
   - flip check to do the f/m/s first for better readability
   - touch up text
  ]

Signed-off-by: David Wang <davidwang@zhaoxin.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: lukelin@viacpu.com
Cc: qiyuanwang@zhaoxin.com
Cc: Greg KH <greg@kroah.com>
Cc: brucechang@via-alliance.com
Cc: timguo@zhaoxin.com
Cc: cooperyan@zhaoxin.com
Cc: Tony Luck <tony.luck@intel.com>
Cc: benjaminpan@viatech.com
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/1524652420-17330-2-git-send-email-davidwang@zhaoxin.com
2018-05-06 12:46:25 +02:00
Linus Torvalds d22fff8141 Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar:

 - Extend the memmap= boot parameter syntax to allow the redeclaration
   and dropping of existing ranges, and to support all e820 range types
   (Jan H. Schönherr)

 - Improve the W+X boot time security checks to remove false positive
   warnings on Xen (Jan Beulich)

 - Support booting as Xen PVH guest (Juergen Gross)

 - Improved 5-level paging (LA57) support, in particular it's possible
   now to have a single kernel image for both 4-level and 5-level
   hardware (Kirill A. Shutemov)

 - AMD hardware RAM encryption support (SME/SEV) fixes (Tom Lendacky)

 - Preparatory commits for hardware-encrypted RAM support on Intel CPUs.
   (Kirill A. Shutemov)

 - Improved Intel-MID support (Andy Shevchenko)

 - Show EFI page tables in page_tables debug files (Andy Lutomirski)

 - ... plus misc fixes and smaller cleanups

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (56 commits)
  x86/cpu/tme: Fix spelling: "configuation" -> "configuration"
  x86/boot: Fix SEV boot failure from change to __PHYSICAL_MASK_SHIFT
  x86/mm: Update comment in detect_tme() regarding x86_phys_bits
  x86/mm/32: Remove unused node_memmap_size_bytes() & CONFIG_NEED_NODE_MEMMAP_SIZE logic
  x86/mm: Remove pointless checks in vmalloc_fault
  x86/platform/intel-mid: Add special handling for ACPI HW reduced platforms
  ACPI, x86/boot: Introduce the ->reduced_hw_early_init() ACPI callback
  ACPI, x86/boot: Split out acpi_generic_reduce_hw_init() and export
  x86/pconfig: Provide defines and helper to run MKTME_KEY_PROG leaf
  x86/pconfig: Detect PCONFIG targets
  x86/tme: Detect if TME and MKTME is activated by BIOS
  x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G
  x86/boot/compressed/64: Use page table in trampoline memory
  x86/boot/compressed/64: Use stack from trampoline memory
  x86/boot/compressed/64: Make sure we have a 32-bit code segment
  x86/mm: Do not use paravirtualized calls in native_set_p4d()
  kdump, vmcoreinfo: Export pgtable_l5_enabled value
  x86/boot/compressed/64: Prepare new top-level page table for trampoline
  x86/boot/compressed/64: Set up trampoline memory
  x86/boot/compressed/64: Save and restore trampoline memory
  ...
2018-04-02 15:45:30 -07:00
Yazen Ghannam e2efacb6a5 Revert "x86/mce/AMD: Collect error info even if valid bits are not set"
This reverts commit 4b1e84276a.

Software uses the valid bits to decide if the values can be used for
further processing or other actions. So setting the valid bits will have
software act on values that it shouldn't be acting on.

The recommendation to save all the register values does not mean that
the values are always valid.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: tony.luck@intel.com
Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
Cc: bp@suse.de
Cc: linux-edac@vger.kernel.org
Link: https://lkml.kernel.org/r/20180326191526.64314-1-Yazen.Ghannam@amd.com
2018-03-28 20:34:59 +02:00
Thomas Gleixner 745dd37f9d Merge branch 'x86/urgent' into x86/mm to pick up dependencies 2018-03-14 20:23:25 +01:00
Thomas Gleixner 422caa5f7a Merge branch 'ras/urgent' into ras/core
Pick up urgent fixes to apply further development changes.
2018-03-08 15:52:08 +01:00
Seunghun Han b3b7c4795c x86/MCE: Serialize sysfs changes
The check_interval file in

  /sys/devices/system/machinecheck/machinecheck<cpu number>

directory is a global timer value for MCE polling. If it is changed by one
CPU, mce_restart() broadcasts the event to other CPUs to delete and restart
the MCE polling timer and __mcheck_cpu_init_timer() reinitializes the
mce_timer variable.

If more than one CPU writes a specific value to the check_interval file
concurrently, mce_timer is not protected from such concurrent accesses and
all kinds of explosions happen. Since only root can write to those sysfs
variables, the issue is not a big deal security-wise.

However, concurrent writes to these configuration variables is void of
reason so the proper thing to do is to serialize the access with a mutex.

Boris:

 - Make store_int_with_restart() use device_store_ulong() to filter out
   negative intervals
 - Limit min interval to 1 second
 - Correct locking
 - Massage commit message

Signed-off-by: Seunghun Han <kkamagui@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20180302202706.9434-1-kkamagui@gmail.com
2018-03-08 15:36:27 +01:00
Tony Luck fa94d0c6e0 x86/MCE: Save microcode revision in machine check records
Updating microcode used to be relatively rare. Now that it has become
more common we should save the microcode version in a machine check
record to make sure that those people looking at the error have this
important information bundled with the rest of the logged information.

[ Borislav: Simplify a bit. ]

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20180301233449.24311-1-tony.luck@intel.com
2018-03-08 15:34:49 +01:00
Ingo Molnar 3f7df3efeb Linux 4.16-rc3
-----BEGIN PGP SIGNATURE-----
 
 iQFSBAABCAA8FiEEq68RxlopcLEwq+PEeb4+QwBBGIYFAlqTdg8eHHRvcnZhbGRz
 QGxpbnV4LWZvdW5kYXRpb24ub3JnAAoJEHm+PkMAQRiG10wH/iSt+OKmBdUZSAYv
 ADvfifLynLgugFYNzuijj8/gVt6b0ZIB2/wSYfdPjDErLFogis6wjnxl0lf3sEMB
 g7Oy8SE+pPPQ7587lFkg6Pj53405b6BwCbSkg8PLlwepSGiu0JmGvUYmz753tIeP
 kRIIQk/KrLlxNFixhGWNfQ9k8PqJ0NCgcbj+mTxmFkfIw2FKnBtYz72LR7Eut3Mt
 PJFh4pLKsHKlcjvX8+SehDdLwlEBv/ohDP7S7gRyR+QX1aNZhZAXyHQ0C8/tw8h6
 DnRvlTWp9EGTFxp8bYie5xcWusIcfy1eAA8yiG2kH+Mx7kLa8cmU234bHhUiu9yT
 YJSLoI4=
 =XBoV
 -----END PGP SIGNATURE-----

Merge tag 'v4.16-rc3' into x86/mm, to pick up fixes

Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-02-26 08:41:15 +01:00
Borislav Petkov 4b1e84276a x86/mce/AMD: Collect error info even if valid bits are not set
The MCA banks log error info into MCA_ADDR, MCA_MISC0, and MCA_SYND even
if the corresponding valid bits are not set:

"Error handlers should save the values in MCA_ADDR, MCA_MISC0,
and MCA_SYND even if MCA_STATUS[AddrV], MCA_STATUS[MiscV], and
MCA_STATUS[SyndV] are zero."

Do so by setting those bits so that code down the MCE processing path
doesn't need to be changed.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-5-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-02-21 17:00:54 +01:00
Borislav Petkov b2fbf6f282 x86/mce: Issue the 'mcelog --ascii' message only on !AMD
mcelog cannot decode AMD MCEs.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-4-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-02-21 17:00:53 +01:00
Borislav Petkov 0993394664 x86/mce: Convert 'struct mca_config' bools to a bitfield
... to save space when future flags are added.

No functionality change.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-3-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-02-21 17:00:53 +01:00
Kirill A. Shutemov c65e774fb3 x86/mm: Make PGDIR_SHIFT and PTRS_PER_P4D variable
For boot-time switching between 4- and 5-level paging we need to be able
to fold p4d page table level at runtime. It requires variable
PGDIR_SHIFT and PTRS_PER_P4D.

The change doesn't affect the kernel image size much:

   text	   data	    bss	    dec	    hex	filename
8628091	4734304	1368064	14730459	 e0c4db	vmlinux.before
8628393	4734340	1368064	14730797	 e0c62d	vmlinux.after

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20180214111656.88514-7-kirill.shutemov@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-02-14 13:11:14 +01:00
Tony Luck fd0e786d9d x86/mm, mm/hwpoison: Don't unconditionally unmap kernel 1:1 pages
In the following commit:

  ce0fa3e56a ("x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages")

... we added code to memory_failure() to unmap the page from the
kernel 1:1 virtual address space to avoid speculative access to the
page logging additional errors.

But memory_failure() may not always succeed in taking the page offline,
especially if the page belongs to the kernel.  This can happen if
there are too many corrected errors on a page and either mcelog(8)
or drivers/ras/cec.c asks to take a page offline.

Since we remove the 1:1 mapping early in memory_failure(), we can
end up with the page unmapped, but still in use. On the next access
the kernel crashes :-(

There are also various debug paths that call memory_failure() to simulate
occurrence of an error. Since there is no actual error in memory, we
don't need to map out the page for those cases.

Revert most of the previous attempt and keep the solution local to
arch/x86/kernel/cpu/mcheck/mce.c. Unmap the page only when:

	1) there is a real error
	2) memory_failure() succeeds.

All of this only applies to 64-bit systems. 32-bit kernel doesn't map
all of memory into kernel space. It isn't worth adding the code to unmap
the piece that is mapped because nobody would run a 32-bit kernel on a
machine that has recoverable machine checks.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave <dave.hansen@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert (Persistent Memory) <elliott@hpe.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Cc: stable@vger.kernel.org #v4.14
Fixes: ce0fa3e56a ("x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-02-13 16:25:06 +01:00
Borislav Petkov c80c5ec1b2 x86/MCE: Fix build warning introduced by "x86: do not use print_symbol()"
The following commit:

  7b6061627e ("x86: do not use print_symbol()")

... introduced a new build warning on 32-bit x86:

  arch/x86/kernel/cpu/mcheck/mce.c:237:21: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
      pr_cont("{%pS}", (void *)m->ip);
                       ^

Fix the type mismatch between the 'void *' expected by %pS and the mce->ip
field which is u64 by casting to long.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-kernel@vger.kernel.org
Fixes: 7b6061627e ("x86: do not use print_symbol()")
Link: http://lkml.kernel.org/r/20180210145314.22174-1-bp@alien8.de
[ Cleaned up the changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-02-11 11:37:39 +01:00
Linus Torvalds ab486bc9a5 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/pmladek/printk
Pull printk updates from Petr Mladek:

 - Add a console_msg_format command line option:

     The value "default" keeps the old "[time stamp] text\n" format. The
     value "syslog" allows to see the syslog-like "<log
     level>[timestamp] text" format.

     This feature was requested by people doing regression tests, for
     example, 0day robot. They want to have both filtered and full logs
     at hands.

 - Reduce the risk of softlockup:

     Pass the console owner in a busy loop.

     This is a new approach to the old problem. It was first proposed by
     Steven Rostedt on Kernel Summit 2017. It marks a context in which
     the console_lock owner calls console drivers and could not sleep.
     On the other side, printk() callers could detect this state and use
     a busy wait instead of a simple console_trylock(). Finally, the
     console_lock owner checks if there is a busy waiter at the end of
     the special context and eventually passes the console_lock to the
     waiter.

     The hand-off works surprisingly well and helps in many situations.
     Well, there is still a possibility of the softlockup, for example,
     when the flood of messages stops and the last owner still has too
     much to flush.

     There is increasing number of people having problems with
     printk-related softlockups. We might eventually need to get better
     solution. Anyway, this looks like a good start and promising
     direction.

 - Do not allow to schedule in console_unlock() called from printk():

     This reverts an older controversial commit. The reschedule helped
     to avoid softlockups. But it also slowed down the console output.
     This patch is obsoleted by the new console waiter logic described
     above. In fact, the reschedule made the hand-off less effective.

 - Deprecate "%pf" and "%pF" format specifier:

     It was needed on ia64, ppc64 and parisc64 to dereference function
     descriptors and show the real function address. It is done
     transparently by "%ps" and "pS" format specifier now.

     Sergey Senozhatsky found that all the function descriptors were in
     a special elf section and could be easily detected.

 - Remove printk_symbol() API:

     It has been obsoleted by "%pS" format specifier, and this change
     helped to remove few continuous lines and a less intuitive old API.

 - Remove redundant memsets:

     Sergey removed unnecessary memset when processing printk.devkmsg
     command line option.

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/pmladek/printk: (27 commits)
  printk: drop redundant devkmsg_log_str memsets
  printk: Never set console_may_schedule in console_trylock()
  printk: Hide console waiter logic into helpers
  printk: Add console owner and waiter logic to load balance console writes
  kallsyms: remove print_symbol() function
  checkpatch: add pF/pf deprecation warning
  symbol lookup: introduce dereference_symbol_descriptor()
  parisc64: Add .opd based function descriptor dereference
  powerpc64: Add .opd based function descriptor dereference
  ia64: Add .opd based function descriptor dereference
  sections: split dereference_function_descriptor()
  openrisc: Fix conflicting types for _exext and _stext
  lib: do not use print_symbol()
  irq debug: do not use print_symbol()
  sysfs: do not use print_symbol()
  drivers: do not use print_symbol()
  x86: do not use print_symbol()
  unicore32: do not use print_symbol()
  sh: do not use print_symbol()
  mn10300: do not use print_symbol()
  ...
2018-02-01 13:36:15 -08:00
Linus Torvalds d4173023e6 Merge branch 'siginfo-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull siginfo cleanups from Eric Biederman:
 "Long ago when 2.4 was just a testing release copy_siginfo_to_user was
  made to copy individual fields to userspace, possibly for efficiency
  and to ensure initialized values were not copied to userspace.

  Unfortunately the design was complex, it's assumptions unstated, and
  humans are fallible and so while it worked much of the time that
  design failed to ensure unitialized memory is not copied to userspace.

  This set of changes is part of a new design to clean up siginfo and
  simplify things, and hopefully make the siginfo handling robust enough
  that a simple inspection of the code can be made to ensure we don't
  copy any unitializied fields to userspace.

  The design is to unify struct siginfo and struct compat_siginfo into a
  single definition that is shared between all architectures so that
  anyone adding to the set of information shared with struct siginfo can
  see the whole picture. Hopefully ensuring all future si_code
  assignments are arch independent.

  The design is to unify copy_siginfo_to_user32 and
  copy_siginfo_from_user32 so that those function are complete and cope
  with all of the different cases documented in signinfo_layout. I don't
  think there was a single implementation of either of those functions
  that was complete and correct before my changes unified them.

  The design is to introduce a series of helpers including
  force_siginfo_fault that take the values that are needed in struct
  siginfo and build the siginfo structure for their callers. Ensuring
  struct siginfo is built correctly.

  The remaining work for 4.17 (unless someone thinks it is post -rc1
  material) is to push usage of those helpers down into the
  architectures so that architecture specific code will not need to deal
  with the fiddly work of intializing struct siginfo, and then when
  struct siginfo is guaranteed to be fully initialized change copy
  siginfo_to_user into a simple wrapper around copy_to_user.

  Further there is work in progress on the issues that have been
  documented requires arch specific knowledge to sort out.

  The changes below fix or at least document all of the issues that have
  been found with siginfo generation. Then proceed to unify struct
  siginfo the 32 bit helpers that copy siginfo to and from userspace,
  and generally clean up anything that is not arch specific with regards
  to siginfo generation.

  It is a lot but with the unification you can of siginfo you can
  already see the code reduction in the kernel"

* 'siginfo-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (45 commits)
  signal/memory-failure: Use force_sig_mceerr and send_sig_mceerr
  mm/memory_failure: Remove unused trapno from memory_failure
  signal/ptrace: Add force_sig_ptrace_errno_trap and use it where needed
  signal/powerpc: Remove unnecessary signal_code parameter of do_send_trap
  signal: Helpers for faults with specialized siginfo layouts
  signal: Add send_sig_fault and force_sig_fault
  signal: Replace memset(info,...) with clear_siginfo for clarity
  signal: Don't use structure initializers for struct siginfo
  signal/arm64: Better isolate the COMPAT_TASK portion of ptrace_hbptriggered
  ptrace: Use copy_siginfo in setsiginfo and getsiginfo
  signal: Unify and correct copy_siginfo_to_user32
  signal: Remove the code to clear siginfo before calling copy_siginfo_from_user32
  signal: Unify and correct copy_siginfo_from_user32
  signal/blackfin: Remove pointless UID16_SIGINFO_COMPAT_NEEDED
  signal/blackfin: Move the blackfin specific si_codes to asm-generic/siginfo.h
  signal/tile: Move the tile specific si_codes to asm-generic/siginfo.h
  signal/frv: Move the frv specific si_codes to asm-generic/siginfo.h
  signal/ia64: Move the ia64 specific si_codes to asm-generic/siginfo.h
  signal/powerpc: Remove redefinition of NSIGTRAP on powerpc
  signal: Move addr_lsb into the _sigfault union for clarity
  ...
2018-01-30 14:18:52 -08:00
Linus Torvalds a1c75e17e7 Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS updates from Ingo Molnar:

 - various AMD SMCA error parsing/reporting improvements (Yazen Ghannam)

 - extend Intel CMCI error reporting to more cases (Xie XiuQi)

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/MCE: Make correctable error detection look at the Deferred bit
  x86/MCE: Report only DRAM ECC as memory errors on AMD systems
  x86/MCE/AMD: Define a function to get SMCA bank type
  x86/mce/AMD: Don't set DEF_INT_TYPE in MSR_CU_DEF_ERR on SMCA systems
  x86/MCE: Extend table to report action optional errors through CMCI too
2018-01-30 11:48:44 -08:00
Eric W. Biederman 83b57531c5 mm/memory_failure: Remove unused trapno from memory_failure
Today 4 architectures set ARCH_SUPPORTS_MEMORY_FAILURE (arm64, parisc,
powerpc, and x86), while 4 other architectures set __ARCH_SI_TRAPNO
(alpha, metag, sparc, and tile).  These two sets of architectures do
not interesect so remove the trapno paramater to remove confusion.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
2018-01-23 12:17:42 -06:00
Linus Torvalds 5515114211 Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 pti fixes from Thomas Gleixner:
 "A small set of fixes for the meltdown/spectre mitigations:

   - Make kprobes aware of retpolines to prevent probes in the retpoline
     thunks.

   - Make the machine check exception speculation protected. MCE used to
     issue an indirect call directly from the ASM entry code. Convert
     that to a direct call into a C-function and issue the indirect call
     from there so the compiler can add the retpoline protection,

   - Make the vmexit_fill_RSB() assembly less stupid

   - Fix a typo in the PTI documentation"

* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/retpoline: Optimize inline assembler for vmexit_fill_RSB
  x86/pti: Document fix wrong index
  kprobes/x86: Disable optimizing on the function jumps to indirect thunk
  kprobes/x86: Blacklist indirect thunk functions for kprobes
  retpoline: Introduce start/end markers of indirect thunk
  x86/mce: Make machine check speculation protected
2018-01-21 10:48:35 -08:00
Thomas Gleixner 6f41c34d69 x86/mce: Make machine check speculation protected
The machine check idtentry uses an indirect branch directly from the low
level code. This evades the speculation protection.

Replace it by a direct call into C code and issue the indirect call there
so the compiler can apply the proper speculation protection.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by:Borislav Petkov <bp@alien8.de>
Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
Niced-by: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801181626290.1847@nanos
2018-01-19 16:31:28 +01:00
Sergey Senozhatsky 7b6061627e x86: do not use print_symbol()
print_symbol() is a very old API that has been obsoleted by %pS format
specifier in a normal printk() call.

Replace print_symbol() with a direct printk("%pS") call and correctly
handle continuous lines.

Link: http://lkml.kernel.org/r/20171211125025.2270-9-sergey.senozhatsky@gmail.com
To: Andrew Morton <akpm@linux-foundation.org>
To: Russell King <linux@armlinux.org.uk>
To: Catalin Marinas <catalin.marinas@arm.com>
To: Mark Salter <msalter@redhat.com>
To: Tony Luck <tony.luck@intel.com>
To: David Howells <dhowells@redhat.com>
To: Yoshinori Sato <ysato@users.sourceforge.jp>
To: Guan Xuetao <gxt@mprc.pku.edu.cn>
To: Borislav Petkov <bp@alien8.de>
To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: Thomas Gleixner <tglx@linutronix.de>
To: Peter Zijlstra <peterz@infradead.org>
To: Vineet Gupta <vgupta@synopsys.com>
To: Fengguang Wu <fengguang.wu@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-c6x-dev@linux-c6x.org
Cc: linux-ia64@vger.kernel.org
Cc: linux-am33-list@redhat.com
Cc: linux-sh@vger.kernel.org
Cc: linux-edac@vger.kernel.org
Cc: x86@kernel.org
Cc: linux-snps-arc@lists.infradead.org
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Borislav Petkov <bp@suse.de> # mce.c part
[pmladek@suse.com: updated commit message]
Signed-off-by: Petr Mladek <pmladek@suse.com>
2018-01-05 15:23:01 +01:00
Yazen Ghannam 179eb850ac x86/MCE: Make correctable error detection look at the Deferred bit
AMD systems may log Deferred errors. These are errors that are uncorrected
but which do not need immediate action. The MCA_STATUS[UC] bit may not be
set for Deferred errors.

Flag the error as not correctable when MCA_STATUS[Deferred] is set and
do not feed it into the Correctable Errors Collector.

[ bp: Massage commit message. ]

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20171212165143.27475-1-Yazen.Ghannam@amd.com
2017-12-18 12:58:29 +01:00
Yazen Ghannam c6708d50f1 x86/MCE: Report only DRAM ECC as memory errors on AMD systems
The MCA_STATUS[ErrorCodeExt] field is very bank type specific.
We currently check if the ErrorCodeExt value is 0x0 or 0x8 in
mce_is_memory_error(), but we don't check the bank number. This means
that we could flag non-memory errors as memory errors.

We know that we want to flag DRAM ECC errors as memory errors, so let's do
those cases first. We can add more cases later when needed.

Define a wrapper function in mce_amd.c so we can use SMCA enums.

[ bp: Remove brackets around return statements. ]

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20171207203955.118171-2-Yazen.Ghannam@amd.com
2017-12-18 12:58:29 +01:00
Kees Cook 92bb6cb140 x86/mce: Convert timers to use timer_setup()
In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly. Adjust sanity-check WARN to make sure
the triggering timer matches the current CPU timer.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@alien8.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac@vger.kernel.org
Link: https://lkml.kernel.org/r/20171005005425.GA23950@beast
2017-10-05 14:34:55 +02:00
Tony Luck ce0fa3e56a x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages
Speculative processor accesses may reference any memory that has a
valid page table entry.  While a speculative access won't generate
a machine check, it will log the error in a machine check bank. That
could cause escalation of a subsequent error since the overflow bit
will be then set in the machine check bank status register.

Code has to be double-plus-tricky to avoid mentioning the 1:1 virtual
address of the page we want to map out otherwise we may trigger the
very problem we are trying to avoid.  We use a non-canonical address
that passes through the usual Linux table walking code to get to the
same "pte".

Thanks to Dave Hansen for reviewing several iterations of this.

Also see:

  http://marc.info/?l=linux-mm&m=149860136413338&w=2

Signed-off-by: Tony Luck <tony.luck@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Elliott, Robert (Persistent Memory) <elliott@hpe.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20170816171803.28342-1-tony.luck@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-08-17 10:30:49 +02:00
Yazen Ghannam e2de64ec52 x86/mce: Always save severity in machine_check_poll()
The MCE severity gives a hint as to how to handle the error. The
notifier blocks can then use the severity to decide on an action.
It's not necessary for machine_check_poll() to filter errors for
the notifier chain, since each block will check its own set of
conditions before handling an error.

Also, there isn't any urgency for machine_check_poll() to make decisions
based on severity like in do_machine_check().

If we can assume that a severity is set then we can use it in more
notifier blocks. For example, the CEC block could check for a "KEEP"
severity rather than checking bits in the status. This isn't possible
now since the severity is not set except for "DEFFRRED/UCNA" errors with
a valid address.

Save the severity since we have it, and let the notifier blocks decide
if they want to do anything.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1498074402-98633-1-git-send-email-Yazen.Ghannam@amd.com
2017-06-26 15:58:56 +02:00
Yazen Ghannam 6057077f6e x86/mce: Update bootlog description to reflect behavior on AMD
The bootlog option is only disabled by default on AMD Fam10h and older
systems.

Update bootlog description to say this. Change the family value to hex
to avoid confusion.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170613162835.30750-9-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-06-14 07:32:10 +02:00
Yazen Ghannam ec33838244 x86/mce: Don't disable MCA banks when offlining a CPU on AMD
AMD systems have non-core, shared MCA banks within a die. These banks
are controlled by a master CPU per die. If this CPU is offlined then all
the shared banks are disabled in addition to the CPU's core banks.

Also, Fam17h systems may have SMT enabled. The MCA_CTL register is shared
between SMT thread siblings. If a CPU is offlined then all its sibling's
MCA banks are also disabled.

Extend the existing vendor check to AMD too.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
[ Fix up comment. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170613162835.30750-8-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-06-14 07:32:09 +02:00
Borislav Petkov 2d1f406139 x86/MCE: Export memory_error()
Export the function which checks whether an MCE is a memory error to
other users so that we can reuse the logic. Drop the boot_cpu_data use,
while at it, as mce.cpuvendor already has the CPU vendor in there.

Integrate a piece from a patch from Vishal Verma
<vishal.l.verma@intel.com> to export it for modules (nfit).

The main reason we're exporting it is that the nfit handler
nfit_handle_mce() needs to detect a memory error properly before doing
its recovery actions.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: <stable@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170519093915.15413-2-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2017-05-21 21:39:58 +02:00
Linus Torvalds 3dee9fb2a4 Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Ingo Molnar:
 "The main changes in this cycle were:

   - add the 'Corrected Errors Collector' kernel feature which collect
     and monitor correctable errors statistics and will preemptively
     (soft-)offline physical pages that have a suspiciously high error
     count.

   - handle MCE errors during kexec() more gracefully

   - factor out and deprecate the /dev/mcelog driver

   - ... plus misc fixes and cleanpus"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Check MCi_STATUS[MISCV] for usable addr on Intel only
  ACPI/APEI: Use setup_deferrable_timer()
  x86/mce: Update notifier priority check
  x86/mce: Enable PPIN for Knights Landing/Mill
  x86/mce: Do not register notifiers with invalid prio
  x86/mce: Factor out and deprecate the /dev/mcelog driver
  RAS: Add a Corrected Errors Collector
  x86/mce: Rename mce_log to mce_log_buffer
  x86/mce: Rename mce_log()'s argument
  x86/mce: Init some CPU features early
  x86/mce: Handle broadcasted MCE gracefully with kexec
2017-05-01 20:48:33 -07:00
Borislav Petkov c6a9583fb4 x86/mce: Check MCi_STATUS[MISCV] for usable addr on Intel only
mce_usable_address() does a bunch of basic sanity checks to verify
whether the address reported with the error is usable for further
processing. However, we do check MCi_STATUS[MISCV] and that is not
needed on AMD as that bit says that there's additional information about
the logged error in the MCi_MISCj banks.

But we don't need that to know whether the address is usable - we only
need to know whether the physical address is valid - i.e., ADDRV.

On Intel the MISCV bit is needed to perform additional checks to determine
whether the reported address is a physical one, etc.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170418183924.6agjkebilwqj26or@pd.tnic
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2017-04-19 12:04:46 +02:00
Vishal Verma 0dc9c639e6 x86/mce: Make the MCE notifier a blocking one
The NFIT MCE handler callback (for handling media errors on NVDIMMs)
takes a mutex to add the location of a memory error to a list. But since
the notifier call chain for machine checks (x86_mce_decoder_chain) is
atomic, we get a lockdep splat like:

  BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620
  in_atomic(): 1, irqs_disabled(): 0, pid: 4, name: kworker/0:0
  [..]
  Call Trace:
   dump_stack
   ___might_sleep
   __might_sleep
   mutex_lock_nested
   ? __lock_acquire
   nfit_handle_mce
   notifier_call_chain
   atomic_notifier_call_chain
   ? atomic_notifier_call_chain
   mce_gen_pool_process

Convert the notifier to a blocking one which gets to run only in process
context.

Boris: remove the notifier call in atomic context in print_mce(). For
now, let's print the MCE on the atomic path so that we can make sure
they go out and get logged at least.

Fixes: 6839a6d96f ("nfit: do an ARS scrub on hitting a latent media error")
Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Cc: <stable@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170411224457.24777-1-vishal.l.verma@intel.com
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2017-04-18 22:23:48 +02:00
Borislav Petkov 415601b191 x86/mce: Update notifier priority check
Update the check which enforces the registration of MCE decoder notifier
callbacks with valid priority only, to include mcelog's priority.

Reported-by: kernel test robot <xiaolong.ye@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: lkp@01.org
Link: http://lkml.kernel.org/r/20170418073820.i6kl5tggcntwlisa@pd.tnic
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-04-18 10:27:52 +02:00
Borislav Petkov 32b40a82e8 x86/mce: Do not register notifiers with invalid prio
This is just a defensive precaution: do not register notifiers with a
priority which would disrupt the error handling in the notifiers with
prio higher than MCE_PRIO_EDAC.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170327093304.10683-7-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-03-28 08:55:15 +02:00
Tony Luck 5de97c9f6d x86/mce: Factor out and deprecate the /dev/mcelog driver
Move all code relating to /dev/mcelog to a separate source file.
/dev/mcelog driver can now operate from the machine check notifier with
lowest prio.

Signed-off-by: Tony Luck <tony.luck@intel.com>
[ Move the mce_helper and trigger functionality behind CONFIG_X86_MCELOG_LEGACY. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170327093304.10683-6-bp@alien8.de
[ Renamed CONFIG_X86_MCELOG to CONFIG_X86_MCELOG_LEGACY. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-03-28 08:55:01 +02:00
Borislav Petkov 011d826111 RAS: Add a Corrected Errors Collector
Introduce a simple data structure for collecting correctable errors
along with accessors. More detailed description in the code itself.

The error decoding is done with the decoding chain now and
mce_first_notifier() gets to see the error first and the CEC decides
whether to log it and then the rest of the chain doesn't hear about it -
basically the main reason for the CE collector - or to continue running
the notifiers.

When the CEC hits the action threshold, it will try to soft-offine the
page containing the ECC and then the whole decoding chain gets to see
the error.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170327093304.10683-5-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-03-28 08:54:48 +02:00
Borislav Petkov e64edfcce9 x86/mce: Rename mce_log to mce_log_buffer
It is confusing when staring at "struct mce_log mcelog" and then there's
also a function called mce_log(). So call the buffer what it is.

No functionality change.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170327093304.10683-4-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-03-28 08:54:42 +02:00
Borislav Petkov fe3ed20fdd x86/mce: Rename mce_log()'s argument
We call it everywhere "struct mce *m". Adjust that here too to avoid
confusion.

No functionality change.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170327093304.10683-3-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-03-28 08:54:38 +02:00
Ingo Molnar 4a96d1a5f0 Merge branch 'ras/urgent' into ras/core, to pick up fix
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-03-28 08:54:25 +02:00
Andi Kleen cc66afea58 x86/mce: Don't print MCEs when mcelog is active
Since:

  cd9c57cad3 ("x86/MCE: Dump MCE to dmesg if no consumers")

all MCEs are printed even when mcelog is running. Fix the regression to
not print to dmesg when mcelog is running as it is a consumer too.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
[ Massage commit message. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: stable@vger.kernel.org # 4.10..
Fixes: cd9c57cad3 ("x86/MCE: Dump MCE to dmesg if no consumers")
Link: http://lkml.kernel.org/r/20170327093304.10683-2-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-03-28 08:53:52 +02:00
Yazen Ghannam 5204bf1703 x86/mce: Init some CPU features early
When the MCA banks in __mcheck_cpu_init_generic() are polled for leftover
errors logged during boot or from the previous boot, its required to have
CPU features detected sufficiently so that the reading out and handling of
those early errors is done correctly.

If those features are not available, the decoding may miss some information
and get incomplete errors logged. For example, on SMCA systems the MCA_IPID
and MCA_SYND registers are not logged and MCA_ADDR is not masked
appropriately.

To cure that, do a subset of the basic feature detection early while the
rest happens in its usual place in __mcheck_cpu_init_vendor().

Signed-off-by: Yazen Ghannam <Yazen.Ghannam@amd.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Link: http://lkml.kernel.org/r/1489599055-20756-1-git-send-email-Yazen.Ghannam@amd.com
[ Massage commit message and simplify. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2017-03-18 13:03:44 +01:00
Xunlei Pang 5bc329503e x86/mce: Handle broadcasted MCE gracefully with kexec
When we are about to kexec a crash kernel and right then and there a
broadcasted MCE fires while we're still in the first kernel and while
the other CPUs remain in a holding pattern, the #MC handler of the
first kernel will timeout and then panic due to never completing MCE
synchronization.

Handle this in a similar way as to when the CPUs are offlined when that
broadcasted MCE happens.

[ Boris: rewrote commit message and comments. ]

Suggested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Xunlei Pang <xlpang@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Tony Luck <tony.luck@intel.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: kexec@lists.infradead.org
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/1487857012-9059-1-git-send-email-xlpang@redhat.com
Link: http://lkml.kernel.org/r/20170313095019.19351-1-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2017-03-13 20:18:07 +01:00
Linus Torvalds 60c906bab1 Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Ingo Molnar:
 "The main changes in this cycle were:

  - Assign notifier chain priorities for all RAS related handlers to
    make the ordering explicit (Borislav Petkov)

  - Improve the AMD MCA banks sysfs output (Yazen Ghannam)

  - Various cleanups and restructuring of the x86 RAS code (Borislav
    Petkov)"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/ras, EDAC, acpi: Assign MCE notifier handlers a priority
  x86/ras: Get rid of mce_process_work()
  EDAC/mce/amd: Dump TSC value
  EDAC/mce/amd: Unexport amd_decode_mce()
  x86/ras/amd/inj: Change dependency
  x86/ras: Flip the TSC-adding logic
  x86/ras/amd: Make sysfs names of banks more user-friendly
  x86/ras/therm_throt: Do not log a fake MCE for thermal events
  x86/ras/inject: Make it depend on X86_LOCAL_APIC=y
2017-02-20 12:47:44 -08:00
Thomas Gleixner 0becc0ae5b x86/mce: Make timer handling more robust
Erik reported that on a preproduction hardware a CMCI storm triggers the
BUG_ON in add_timer_on(). The reason is that the per CPU MCE timer is
started by the CMCI logic before the MCE CPU hotplug callback starts the
timer with add_timer_on(). So the timer is already queued which triggers
the BUG.

Using add_timer_on() is pretty pointless in this code because the timer is
strictlty per CPU, initialized as pinned and all operations which arm the
timer happen on the CPU to which the timer belongs.

Simplify the whole machinery by using mod_timer() instead of add_timer_on()
which avoids the problem because mod_timer() can handle already queued
timers. Use __start_timer() everywhere so the earliest armed expiry time is
preserved.

Reported-by: Erik Veijola <erik.veijola@intel.com>
Tested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@alien8.de>
Cc: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701310936080.3457@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2017-01-31 21:47:58 +01:00
Borislav Petkov 9026cc82b6 x86/ras, EDAC, acpi: Assign MCE notifier handlers a priority
Assign all notifiers on the MCE decode chain a priority so that they get
called in the correct order.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170123183514.13356-10-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-01-24 09:14:57 +01:00
Borislav Petkov cff4c0391a x86/ras: Get rid of mce_process_work()
Make mce_gen_pool_process() the workqueue function directly and save us
an indirection.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170123183514.13356-9-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-01-24 09:14:56 +01:00
Borislav Petkov 669c00f099 x86/ras: Flip the TSC-adding logic
Add the TSC value to the MCE record only when the MCE being logged is
precise, i.e., it is logged as an exception or an MCE-related interrupt.

So it doesn't look particularly easy to do without touching/changing a
bunch of places. That's why I'm trying tricks first.

For example, the mce-apei.c case I'm addressing by setting ->tsc only
for errors of panic severity. The idea there is, that, panic errors will
have raised an #MC and not polled.

And then instead of propagating a flag to mce_setup(), it seems
easier/less code to set ->tsc depending on the call sites, i.e.,
are we polling or are we preparing an MCE record in an exception
handler/thresholding interrupt.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170123183514.13356-5-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-01-24 09:14:54 +01:00
Borislav Petkov 9b052ea4ce x86/ras/therm_throt: Do not log a fake MCE for thermal events
We log a fake bank 128 MCE to note that we're handling a CPU thermal
event. However, this confuses people into thinking that their hardware
generates MCEs. Hijacking MCA for logging thermal events is a gross
misuse anyway and it shouldn't have been done in the first place. And
besides we have other means for dealing with thermal events which are
much more suitable.

So let's kill the MCE logging part.

Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Ashok Raj <ashok.raj@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170105213846.GA12024@gmail.com
Link: http://lkml.kernel.org/r/20170123183514.13356-3-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-01-24 09:14:53 +01:00
Linus Torvalds c11a6cfb01 Merge branch 'for-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue updates from Tejun Heo:
 "Mostly patches to initialize workqueue subsystem earlier and get rid
  of keventd_up().

  The patches were headed for the last merge cycle but got delayed due
  to a bug found late minute, which is fixed now.

  Also, to help debugging, destroy_workqueue() is more chatty now on a
  sanity check failure."

* 'for-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
  workqueue: move wq_numa_init() to workqueue_init()
  workqueue: remove keventd_up()
  debugobj, workqueue: remove keventd_up() usage
  slab, workqueue: remove keventd_up() usage
  power, workqueue: remove keventd_up() usage
  tty, workqueue: remove keventd_up() usage
  mce, workqueue: remove keventd_up() usage
  workqueue: make workqueue available early during boot
  workqueue: dump workqueue state on sanity check failures in destroy_workqueue()
2016-12-13 12:59:57 -08:00