Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "7 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  arch/Kconfig: update HAVE_RELIABLE_STACKTRACE description
  mm, hotplug: fix page online with DEBUG_PAGEALLOC compiled but not enabled
  mm/z3fold.c: do not include rwlock.h directly
  fat: fix uninit-memory access for partial initialized inode
  mm: avoid data corruption on CoW fault into PFN-mapped VMA
  mm: fix possible PMD dirty bit lost in set_pmd_migration_entry()
  mm, numa: fix bad pmd by atomically check for pmd_trans_huge when marking page tables prot_numa
This commit is contained in:
Linus Torvalds 2020-03-06 07:18:36 -06:00
commit aeb542a1b5
8 changed files with 85 additions and 28 deletions

View file

@ -738,8 +738,9 @@ config HAVE_STACK_VALIDATION
config HAVE_RELIABLE_STACKTRACE
bool
help
Architecture has a save_stack_trace_tsk_reliable() function which
only returns a stack trace if it can guarantee the trace is reliable.
Architecture has either save_stack_trace_tsk_reliable() or
arch_stack_walk_reliable() function which only returns a stack trace
if it can guarantee the trace is reliable.
config HAVE_ARCH_HASH
bool

View file

@ -750,6 +750,13 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
return NULL;
init_rwsem(&ei->truncate_lock);
/* Zeroing to allow iput() even if partial initialized inode. */
ei->mmu_private = 0;
ei->i_start = 0;
ei->i_logstart = 0;
ei->i_attrs = 0;
ei->i_pos = 0;
return &ei->vfs_inode;
}
@ -1374,16 +1381,6 @@ out:
return 0;
}
static void fat_dummy_inode_init(struct inode *inode)
{
/* Initialize this dummy inode to work as no-op. */
MSDOS_I(inode)->mmu_private = 0;
MSDOS_I(inode)->i_start = 0;
MSDOS_I(inode)->i_logstart = 0;
MSDOS_I(inode)->i_attrs = 0;
MSDOS_I(inode)->i_pos = 0;
}
static int fat_read_root(struct inode *inode)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
@ -1844,13 +1841,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
fat_inode = new_inode(sb);
if (!fat_inode)
goto out_fail;
fat_dummy_inode_init(fat_inode);
sbi->fat_inode = fat_inode;
fsinfo_inode = new_inode(sb);
if (!fsinfo_inode)
goto out_fail;
fat_dummy_inode_init(fsinfo_inode);
fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
sbi->fsinfo_inode = fsinfo_inode;
insert_inode_hash(fsinfo_inode);

View file

@ -2715,6 +2715,10 @@ static inline bool debug_pagealloc_enabled_static(void)
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP)
extern void __kernel_map_pages(struct page *page, int numpages, int enable);
/*
* When called in DEBUG_PAGEALLOC context, the call should most likely be
* guarded by debug_pagealloc_enabled() or debug_pagealloc_enabled_static()
*/
static inline void
kernel_map_pages(struct page *page, int numpages, int enable)
{

View file

@ -3043,8 +3043,7 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
return;
flush_cache_range(vma, address, address + HPAGE_PMD_SIZE);
pmdval = *pvmw->pmd;
pmdp_invalidate(vma, address, pvmw->pmd);
pmdval = pmdp_invalidate(vma, address, pvmw->pmd);
if (pmd_dirty(pmdval))
set_page_dirty(page);
entry = make_migration_entry(page, pmd_write(pmdval));

View file

@ -2257,7 +2257,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
bool ret;
void *kaddr;
void __user *uaddr;
bool force_mkyoung;
bool locked = false;
struct vm_area_struct *vma = vmf->vma;
struct mm_struct *mm = vma->vm_mm;
unsigned long addr = vmf->address;
@ -2282,11 +2282,11 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
* On architectures with software "accessed" bits, we would
* take a double page fault, so mark it accessed here.
*/
force_mkyoung = arch_faults_on_old_pte() && !pte_young(vmf->orig_pte);
if (force_mkyoung) {
if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) {
pte_t entry;
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
locked = true;
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
/*
* Other thread has already handled the fault
@ -2310,18 +2310,37 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
* zeroes.
*/
if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
if (locked)
goto warn;
/* Re-validate under PTL if the page is still mapped */
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
locked = true;
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
/* The PTE changed under us. Retry page fault. */
ret = false;
goto pte_unlock;
}
/*
* Give a warn in case there can be some obscure
* use-case
* The same page can be mapped back since last copy attampt.
* Try to copy again under PTL.
*/
WARN_ON_ONCE(1);
clear_page(kaddr);
if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
/*
* Give a warn in case there can be some obscure
* use-case
*/
warn:
WARN_ON_ONCE(1);
clear_page(kaddr);
}
}
ret = true;
pte_unlock:
if (force_mkyoung)
if (locked)
pte_unmap_unlock(vmf->pte, vmf->ptl);
kunmap_atomic(kaddr);
flush_dcache_page(dst);

View file

@ -574,7 +574,13 @@ EXPORT_SYMBOL_GPL(restore_online_page_callback);
void generic_online_page(struct page *page, unsigned int order)
{
kernel_map_pages(page, 1 << order, 1);
/*
* Freeing the page with debug_pagealloc enabled will try to unmap it,
* so we should map it first. This is better than introducing a special
* case in page freeing fast path.
*/
if (debug_pagealloc_enabled_static())
kernel_map_pages(page, 1 << order, 1);
__free_pages_core(page, order);
totalram_pages_add(1UL << order);
#ifdef CONFIG_HIGHMEM

View file

@ -161,6 +161,31 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
return pages;
}
/*
* Used when setting automatic NUMA hinting protection where it is
* critical that a numa hinting PMD is not confused with a bad PMD.
*/
static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)
{
pmd_t pmdval = pmd_read_atomic(pmd);
/* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
barrier();
#endif
if (pmd_none(pmdval))
return 1;
if (pmd_trans_huge(pmdval))
return 0;
if (unlikely(pmd_bad(pmdval))) {
pmd_clear_bad(pmd);
return 1;
}
return 0;
}
static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
pud_t *pud, unsigned long addr, unsigned long end,
pgprot_t newprot, int dirty_accountable, int prot_numa)
@ -178,8 +203,17 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
unsigned long this_pages;
next = pmd_addr_end(addr, end);
if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)
&& pmd_none_or_clear_bad(pmd))
/*
* Automatic NUMA balancing walks the tables with mmap_sem
* held for read. It's possible a parallel update to occur
* between pmd_trans_huge() and a pmd_none_or_clear_bad()
* check leading to a false positive and clearing.
* Hence, it's necessary to atomically read the PMD value
* for all the checks.
*/
if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) &&
pmd_none_or_clear_bad_unless_trans_huge(pmd))
goto next;
/* invoke the mmu notifier if the pmd is populated */

View file

@ -41,7 +41,6 @@
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/rwlock.h>
#include <linux/zpool.h>
#include <linux/magic.h>