diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 7f5b2a2d3861..d5562f9ce600 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -168,8 +168,13 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) { + tlb->range_start = start; + tlb->range_end = end; + } + tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index 93cadc04ac62..cbe5ac3699bf 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -187,8 +187,10 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, */ static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) + tlb->need_flush = 1; /* * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and * tlb->end_addr. diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index d574d0820dc8..2eb8ff0d6fca 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -77,8 +77,13 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) { + tlb->start = start; + tlb->end = end; + } + tlb_flush_mmu(tlb); } diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 89786560dbd4..51a8bc967e75 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -49,9 +49,9 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { - if (tlb->fullmm) + if (tlb->fullmm || force) flush_tlb_mm(tlb->mm); /* keep the page table cache within bounds */ diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index 2a901eca7145..344d95619d03 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -87,8 +87,13 @@ tlb_flush_mmu(struct mmu_gather *tlb) */ static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) { + tlb->start = start; + tlb->end = end; + tlb->need_flush = 1; + } tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 8f71521e7a44..faddde44de8c 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -116,7 +116,7 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); void tlb_flush_mmu(struct mmu_gather *tlb); void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, bool force); extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 892a7b0196fd..3cadee0a3508 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -538,6 +538,14 @@ static inline bool mm_tlb_flush_pending(struct mm_struct *mm) return atomic_read(&mm->tlb_flush_pending) > 0; } +/* + * Returns true if there are two above TLB batching threads in parallel. + */ +static inline bool mm_tlb_flush_nested(struct mm_struct *mm) +{ + return atomic_read(&mm->tlb_flush_pending) > 1; +} + static inline void init_tlb_flush_pending(struct mm_struct *mm) { atomic_set(&mm->tlb_flush_pending, 0); diff --git a/mm/memory.c b/mm/memory.c index 34cba5113e06..e158f7ac6730 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -272,10 +272,13 @@ void tlb_flush_mmu(struct mmu_gather *tlb) * that were required. */ void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { struct mmu_gather_batch *batch, *next; + if (force) + __tlb_adjust_range(tlb, start, end - start); + tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ @@ -404,12 +407,23 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { arch_tlb_gather_mmu(tlb, mm, start, end); + inc_tlb_flush_pending(tlb->mm); } void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { - arch_tlb_finish_mmu(tlb, start, end); + /* + * If there are parallel threads are doing PTE changes on same range + * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB + * flush by batching, a thread has stable TLB entry can fail to flush + * the TLB by observing pte_none|!pte_dirty, for example so flush TLB + * forcefully if we detect parallel PTE batching threads. + */ + bool force = mm_tlb_flush_nested(tlb->mm); + + arch_tlb_finish_mmu(tlb, start, end, force); + dec_tlb_flush_pending(tlb->mm); } /*