1
0
Fork 0

mm: avoid data corruption on CoW fault into PFN-mapped VMA

[ Upstream commit c3e5ea6ee5 ]

Jeff Moyer has reported that one of xfstests triggers a warning when run
on DAX-enabled filesystem:

	WARNING: CPU: 76 PID: 51024 at mm/memory.c:2317 wp_page_copy+0xc40/0xd50
	...
	wp_page_copy+0x98c/0xd50 (unreliable)
	do_wp_page+0xd8/0xad0
	__handle_mm_fault+0x748/0x1b90
	handle_mm_fault+0x120/0x1f0
	__do_page_fault+0x240/0xd70
	do_page_fault+0x38/0xd0
	handle_page_fault+0x10/0x30

The warning happens on failed __copy_from_user_inatomic() which tries to
copy data into a CoW page.

This happens because of race between MADV_DONTNEED and CoW page fault:

	CPU0					CPU1
 handle_mm_fault()
   do_wp_page()
     wp_page_copy()
       do_wp_page()
					madvise(MADV_DONTNEED)
					  zap_page_range()
					    zap_pte_range()
					      ptep_get_and_clear_full()
					      <TLB flush>
	 __copy_from_user_inatomic()
	 sees empty PTE and fails
	 WARN_ON_ONCE(1)
	 clear_page()

The solution is to re-try __copy_from_user_inatomic() under PTL after
checking that PTE is matches the orig_pte.

The second copy attempt can still fail, like due to non-readable PTE, but
there's nothing reasonable we can do about, except clearing the CoW page.

Reported-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Jeff Moyer <jmoyer@redhat.com>
Cc: <stable@vger.kernel.org>
Cc: Justin He <Justin.He@arm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Link: http://lkml.kernel.org/r/20200218154151.13349-1-kirill.shutemov@linux.intel.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
5.4-rM2-2.2.x-imx-squashed
Kirill A. Shutemov 2020-03-05 22:28:32 -08:00 committed by Greg Kroah-Hartman
parent 2da77d2d0d
commit 3a220956aa
1 changed files with 27 additions and 8 deletions

View File

@ -2163,7 +2163,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
bool ret; bool ret;
void *kaddr; void *kaddr;
void __user *uaddr; void __user *uaddr;
bool force_mkyoung; bool locked = false;
struct vm_area_struct *vma = vmf->vma; struct vm_area_struct *vma = vmf->vma;
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
unsigned long addr = vmf->address; unsigned long addr = vmf->address;
@ -2188,11 +2188,11 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
* On architectures with software "accessed" bits, we would * On architectures with software "accessed" bits, we would
* take a double page fault, so mark it accessed here. * take a double page fault, so mark it accessed here.
*/ */
force_mkyoung = arch_faults_on_old_pte() && !pte_young(vmf->orig_pte); if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) {
if (force_mkyoung) {
pte_t entry; pte_t entry;
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
locked = true;
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
/* /*
* Other thread has already handled the fault * Other thread has already handled the fault
@ -2216,18 +2216,37 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
* zeroes. * zeroes.
*/ */
if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
if (locked)
goto warn;
/* Re-validate under PTL if the page is still mapped */
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
locked = true;
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
/* The PTE changed under us. Retry page fault. */
ret = false;
goto pte_unlock;
}
/* /*
* Give a warn in case there can be some obscure * The same page can be mapped back since last copy attampt.
* use-case * Try to copy again under PTL.
*/ */
WARN_ON_ONCE(1); if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
clear_page(kaddr); /*
* Give a warn in case there can be some obscure
* use-case
*/
warn:
WARN_ON_ONCE(1);
clear_page(kaddr);
}
} }
ret = true; ret = true;
pte_unlock: pte_unlock:
if (force_mkyoung) if (locked)
pte_unmap_unlock(vmf->pte, vmf->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
kunmap_atomic(kaddr); kunmap_atomic(kaddr);
flush_dcache_page(dst); flush_dcache_page(dst);