diff --git a/fs/dax.c b/fs/dax.c index cc025f82ef07..014defd2e744 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1036,7 +1036,7 @@ int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, if (!write && !buffer_mapped(&bh)) { spinlock_t *ptl; pmd_t entry; - struct page *zero_page = get_huge_zero_page(); + struct page *zero_page = mm_get_huge_zero_page(vma->vm_mm); if (unlikely(!zero_page)) { dax_pmd_dbg(&bh, address, "no zero page"); diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 4fca5263fd42..9b9f65d99873 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -156,8 +156,8 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) return is_huge_zero_page(pmd_page(pmd)); } -struct page *get_huge_zero_page(void); -void put_huge_zero_page(void); +struct page *mm_get_huge_zero_page(struct mm_struct *mm); +void mm_put_huge_zero_page(struct mm_struct *mm); #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) @@ -220,9 +220,9 @@ static inline bool is_huge_zero_page(struct page *page) return false; } -static inline void put_huge_zero_page(void) +static inline void mm_put_huge_zero_page(struct mm_struct *mm) { - BUILD_BUG(); + return; } static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, diff --git a/include/linux/sched.h b/include/linux/sched.h index 6bee6f988912..348f51b0ec92 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -526,6 +526,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ #define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ +#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) diff --git a/kernel/fork.c b/kernel/fork.c index 9a8ec66cd4df..6d42242485cb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -854,6 +854,7 @@ static inline void __mmput(struct mm_struct *mm) ksm_exit(mm); khugepaged_exit(mm); /* must run before exit_mmap */ exit_mmap(mm); + mm_put_huge_zero_page(mm); set_mm_exe_file(mm, NULL); if (!list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a0b0e562407d..12b9f1a39b63 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -59,7 +59,7 @@ static struct shrinker deferred_split_shrinker; static atomic_t huge_zero_refcount; struct page *huge_zero_page __read_mostly; -struct page *get_huge_zero_page(void) +static struct page *get_huge_zero_page(void) { struct page *zero_page; retry: @@ -86,7 +86,7 @@ retry: return READ_ONCE(huge_zero_page); } -void put_huge_zero_page(void) +static void put_huge_zero_page(void) { /* * Counter should never go to zero here. Only shrinker can put @@ -95,6 +95,26 @@ void put_huge_zero_page(void) BUG_ON(atomic_dec_and_test(&huge_zero_refcount)); } +struct page *mm_get_huge_zero_page(struct mm_struct *mm) +{ + if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags)) + return READ_ONCE(huge_zero_page); + + if (!get_huge_zero_page()) + return NULL; + + if (test_and_set_bit(MMF_HUGE_ZERO_PAGE, &mm->flags)) + put_huge_zero_page(); + + return READ_ONCE(huge_zero_page); +} + +void mm_put_huge_zero_page(struct mm_struct *mm) +{ + if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags)) + put_huge_zero_page(); +} + static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink, struct shrink_control *sc) { @@ -644,7 +664,7 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe) pgtable = pte_alloc_one(vma->vm_mm, haddr); if (unlikely(!pgtable)) return VM_FAULT_OOM; - zero_page = get_huge_zero_page(); + zero_page = mm_get_huge_zero_page(vma->vm_mm); if (unlikely(!zero_page)) { pte_free(vma->vm_mm, pgtable); count_vm_event(THP_FAULT_FALLBACK); @@ -666,10 +686,8 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe) } } else spin_unlock(fe->ptl); - if (!set) { + if (!set) pte_free(vma->vm_mm, pgtable); - put_huge_zero_page(); - } return ret; } gfp = alloc_hugepage_direct_gfpmask(vma); @@ -823,7 +841,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, * since we already have a zero page to copy. It just takes a * reference. */ - zero_page = get_huge_zero_page(); + zero_page = mm_get_huge_zero_page(dst_mm); set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd, zero_page); ret = 0; @@ -1081,7 +1099,6 @@ alloc: update_mmu_cache_pmd(vma, fe->address, fe->pmd); if (!page) { add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); - put_huge_zero_page(); } else { VM_BUG_ON_PAGE(!PageHead(page), page); page_remove_rmap(page, true); @@ -1542,7 +1559,6 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, } smp_wmb(); /* make pte visible before pmd */ pmd_populate(mm, pmd, pgtable); - put_huge_zero_page(); } static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, @@ -1565,8 +1581,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, if (!vma_is_anonymous(vma)) { _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); - if (is_huge_zero_pmd(_pmd)) - put_huge_zero_page(); if (vma_is_dax(vma)) return; page = pmd_page(_pmd); diff --git a/mm/swap.c b/mm/swap.c index 75c63bb2a1da..4dcf852e1e6d 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -748,10 +748,8 @@ void release_pages(struct page **pages, int nr, bool cold) locked_pgdat = NULL; } - if (is_huge_zero_page(page)) { - put_huge_zero_page(); + if (is_huge_zero_page(page)) continue; - } page = compound_head(page); if (!put_page_testzero(page)) diff --git a/mm/swap_state.c b/mm/swap_state.c index 268b8191982b..8679c997eab6 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -254,9 +254,7 @@ static inline void free_swap_cache(struct page *page) void free_page_and_swap_cache(struct page *page) { free_swap_cache(page); - if (is_huge_zero_page(page)) - put_huge_zero_page(); - else + if (!is_huge_zero_page(page)) put_page(page); }