alistair23-linux/include/linux/mm_inline.h
Tony Luck fd0e786d9d x86/mm, mm/hwpoison: Don't unconditionally unmap kernel 1:1 pages
In the following commit:

  ce0fa3e56a ("x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages")

... we added code to memory_failure() to unmap the page from the
kernel 1:1 virtual address space to avoid speculative access to the
page logging additional errors.

But memory_failure() may not always succeed in taking the page offline,
especially if the page belongs to the kernel.  This can happen if
there are too many corrected errors on a page and either mcelog(8)
or drivers/ras/cec.c asks to take a page offline.

Since we remove the 1:1 mapping early in memory_failure(), we can
end up with the page unmapped, but still in use. On the next access
the kernel crashes :-(

There are also various debug paths that call memory_failure() to simulate
occurrence of an error. Since there is no actual error in memory, we
don't need to map out the page for those cases.

Revert most of the previous attempt and keep the solution local to
arch/x86/kernel/cpu/mcheck/mce.c. Unmap the page only when:

	1) there is a real error
	2) memory_failure() succeeds.

All of this only applies to 64-bit systems. 32-bit kernel doesn't map
all of memory into kernel space. It isn't worth adding the code to unmap
the piece that is mapped because nobody would run a 32-bit kernel on a
machine that has recoverable machine checks.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave <dave.hansen@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert (Persistent Memory) <elliott@hpe.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Cc: stable@vger.kernel.org #v4.14
Fixes: ce0fa3e56a ("x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-02-13 16:25:06 +01:00

131 lines
3.4 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef LINUX_MM_INLINE_H
#define LINUX_MM_INLINE_H
#include <linux/huge_mm.h>
#include <linux/swap.h>
/**
* page_is_file_cache - should the page be on a file LRU or anon LRU?
* @page: the page to test
*
* Returns 1 if @page is page cache page backed by a regular filesystem,
* or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
* Used by functions that manipulate the LRU lists, to sort a page
* onto the right LRU list.
*
* We would like to get this info without a page flag, but the state
* needs to survive until the page is last deleted from the LRU, which
* could be as far down as __page_cache_release.
*/
static inline int page_is_file_cache(struct page *page)
{
return !PageSwapBacked(page);
}
static __always_inline void __update_lru_size(struct lruvec *lruvec,
enum lru_list lru, enum zone_type zid,
int nr_pages)
{
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
__mod_node_page_state(pgdat, NR_LRU_BASE + lru, nr_pages);
__mod_zone_page_state(&pgdat->node_zones[zid],
NR_ZONE_LRU_BASE + lru, nr_pages);
}
static __always_inline void update_lru_size(struct lruvec *lruvec,
enum lru_list lru, enum zone_type zid,
int nr_pages)
{
__update_lru_size(lruvec, lru, zid, nr_pages);
#ifdef CONFIG_MEMCG
mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
#endif
}
static __always_inline void add_page_to_lru_list(struct page *page,
struct lruvec *lruvec, enum lru_list lru)
{
update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
list_add(&page->lru, &lruvec->lists[lru]);
}
static __always_inline void add_page_to_lru_list_tail(struct page *page,
struct lruvec *lruvec, enum lru_list lru)
{
update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
list_add_tail(&page->lru, &lruvec->lists[lru]);
}
static __always_inline void del_page_from_lru_list(struct page *page,
struct lruvec *lruvec, enum lru_list lru)
{
list_del(&page->lru);
update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page));
}
/**
* page_lru_base_type - which LRU list type should a page be on?
* @page: the page to test
*
* Used for LRU list index arithmetic.
*
* Returns the base LRU type - file or anon - @page should be on.
*/
static inline enum lru_list page_lru_base_type(struct page *page)
{
if (page_is_file_cache(page))
return LRU_INACTIVE_FILE;
return LRU_INACTIVE_ANON;
}
/**
* page_off_lru - which LRU list was page on? clearing its lru flags.
* @page: the page to test
*
* Returns the LRU list a page was on, as an index into the array of LRU
* lists; and clears its Unevictable or Active flags, ready for freeing.
*/
static __always_inline enum lru_list page_off_lru(struct page *page)
{
enum lru_list lru;
if (PageUnevictable(page)) {
__ClearPageUnevictable(page);
lru = LRU_UNEVICTABLE;
} else {
lru = page_lru_base_type(page);
if (PageActive(page)) {
__ClearPageActive(page);
lru += LRU_ACTIVE;
}
}
return lru;
}
/**
* page_lru - which LRU list should a page be on?
* @page: the page to test
*
* Returns the LRU list a page should be on, as an index
* into the array of LRU lists.
*/
static __always_inline enum lru_list page_lru(struct page *page)
{
enum lru_list lru;
if (PageUnevictable(page))
lru = LRU_UNEVICTABLE;
else {
lru = page_lru_base_type(page);
if (PageActive(page))
lru += LRU_ACTIVE;
}
return lru;
}
#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
#endif