diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index ba3fff06fad1..1d97fe1c0e53 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -326,7 +326,7 @@ static void smp_ext_bitcall(int cpu, ec_bit_sig sig) */ void smp_ptlb_callback(void *info) { - local_flush_tlb(); + __tlb_flush_local(); } void smp_ptlb_all(void) diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index e45d3c9a4b7e..6cbbfe4f6749 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h @@ -82,7 +82,6 @@ static inline void pgd_free(pgd_t *pgd) */ #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(x) do { } while (0) -#define __pmd_free_tlb(tlb,x) do { } while (0) #define pgd_populate(mm, pmd, pte) BUG() #define pgd_populate_kernel(mm, pmd, pte) BUG() #else /* __s390x__ */ @@ -118,12 +117,6 @@ static inline void pmd_free (pmd_t *pmd) free_pages((unsigned long) pmd, PMD_ALLOC_ORDER); } -#define __pmd_free_tlb(tlb,pmd) \ - do { \ - tlb_flush_mmu(tlb, 0, 0); \ - pmd_free(pmd); \ - } while (0) - static inline void pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) { @@ -224,14 +217,4 @@ static inline void pte_free(struct page *pte) __free_page(pte); } -#define __pte_free_tlb(tlb, pte) \ -({ \ - struct mmu_gather *__tlb = (tlb); \ - struct page *__pte = (pte); \ - struct page *shadow_page = get_shadow_page(__pte); \ - if (shadow_page) \ - tlb_remove_page(__tlb, shadow_page); \ - tlb_remove_page(__tlb, __pte); \ -}) - #endif /* _S390_PGALLOC_H */ diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 39bb5192dc31..b424ab21f8bd 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -424,7 +424,8 @@ static inline pgd_t *get_shadow_pgd(pgd_t *pgdp) * within a page table are directly modified. Thus, the following * hook is made available. */ -static inline void set_pte(pte_t *pteptr, pte_t pteval) +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *pteptr, pte_t pteval) { pte_t *shadow_pte = get_shadow_pte(pteptr); @@ -437,7 +438,6 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval) pte_val(*shadow_pte) = _PAGE_TYPE_EMPTY; } } -#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) /* * pgd/pmd/pte query functions @@ -508,7 +508,8 @@ static inline int pte_file(pte_t pte) return (pte_val(pte) & mask) == _PAGE_TYPE_FILE; } -#define pte_same(a,b) (pte_val(a) == pte_val(b)) +#define __HAVE_ARCH_PTE_SAME +#define pte_same(a,b) (pte_val(a) == pte_val(b)) /* * query functions pte_write/pte_dirty/pte_young only work if @@ -663,24 +664,19 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) { return 0; } -static inline int -ptep_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +static inline int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) { /* No need to flush TLB; bits are in storage key */ - return ptep_test_and_clear_young(vma, address, ptep); -} - -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - pte_t pte = *ptep; - pte_clear(mm, addr, ptep); - return pte; + return 0; } static inline void __ptep_ipte(unsigned long address, pte_t *ptep) @@ -709,6 +705,32 @@ static inline void ptep_invalidate(unsigned long address, pte_t *ptep) __ptep_ipte(address, ptep); } +/* + * This is hard to understand. ptep_get_and_clear and ptep_clear_flush + * both clear the TLB for the unmapped pte. The reason is that + * ptep_get_and_clear is used in common code (e.g. change_pte_range) + * to modify an active pte. The sequence is + * 1) ptep_get_and_clear + * 2) set_pte_at + * 3) flush_tlb_range + * On s390 the tlb needs to get flushed with the modification of the pte + * if the pte is active. The only way how this can be implemented is to + * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range + * is a nop. + */ +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define ptep_get_and_clear(__mm, __address, __ptep) \ +({ \ + pte_t __pte = *(__ptep); \ + if (atomic_read(&(__mm)->mm_users) > 1 || \ + (__mm) != current->active_mm) \ + ptep_invalidate(__address, __ptep); \ + else \ + pte_clear((__mm), (__address), (__ptep)); \ + __pte; \ +}) + +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { @@ -717,12 +739,40 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, return pte; } -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +/* + * The batched pte unmap code uses ptep_get_and_clear_full to clear the + * ptes. Here an optimization is possible. tlb_gather_mmu flushes all + * tlbs of an mm if it can guarantee that the ptes of the mm_struct + * cannot be accessed while the batched unmap is running. In this case + * full==1 and a simple pte_clear is enough. See tlb.h. + */ +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL +static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, int full) { - pte_t old_pte = *ptep; - set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); + pte_t pte = *ptep; + + if (full) + pte_clear(mm, addr, ptep); + else + ptep_invalidate(addr, ptep); + return pte; } +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +#define ptep_set_wrprotect(__mm, __addr, __ptep) \ +({ \ + pte_t __pte = *(__ptep); \ + if (pte_write(__pte)) { \ + if (atomic_read(&(__mm)->mm_users) > 1 || \ + (__mm) != current->active_mm) \ + ptep_invalidate(__addr, __ptep); \ + set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte)); \ + } \ +}) + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS #define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ ({ \ int __changed = !pte_same(*(__ptep), __entry); \ @@ -740,11 +790,13 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, * should therefore only be called if it is not mapped in any * address space. */ +#define __HAVE_ARCH_PAGE_TEST_DIRTY static inline int page_test_dirty(struct page *page) { return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0; } +#define __HAVE_ARCH_PAGE_CLEAR_DIRTY static inline void page_clear_dirty(struct page *page) { page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY); @@ -753,6 +805,7 @@ static inline void page_clear_dirty(struct page *page) /* * Test and clear referenced bit in storage key. */ +#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG static inline int page_test_and_clear_young(struct page *page) { unsigned long physpage = page_to_phys(page); @@ -930,16 +983,6 @@ extern int remove_shared_memory(unsigned long start, unsigned long size); #define __HAVE_ARCH_MEMMAP_INIT extern void memmap_init(unsigned long, int, unsigned long, unsigned long); -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -#define __HAVE_ARCH_PTEP_CLEAR_FLUSH -#define __HAVE_ARCH_PTEP_SET_WRPROTECT -#define __HAVE_ARCH_PTE_SAME -#define __HAVE_ARCH_PAGE_TEST_DIRTY -#define __HAVE_ARCH_PAGE_CLEAR_DIRTY -#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG #include #endif /* _S390_PAGE_H */ diff --git a/include/asm-s390/tlb.h b/include/asm-s390/tlb.h index 51bd957b85bd..55ae45ef31b5 100644 --- a/include/asm-s390/tlb.h +++ b/include/asm-s390/tlb.h @@ -2,19 +2,128 @@ #define _S390_TLB_H /* - * s390 doesn't need any special per-pte or - * per-vma handling.. + * TLB flushing on s390 is complicated. The following requirement + * from the principles of operation is the most arduous: + * + * "A valid table entry must not be changed while it is attached + * to any CPU and may be used for translation by that CPU except to + * (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY, + * or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page + * table entry, or (3) make a change by means of a COMPARE AND SWAP + * AND PURGE instruction that purges the TLB." + * + * The modification of a pte of an active mm struct therefore is + * a two step process: i) invalidate the pte, ii) store the new pte. + * This is true for the page protection bit as well. + * The only possible optimization is to flush at the beginning of + * a tlb_gather_mmu cycle if the mm_struct is currently not in use. + * + * Pages used for the page tables is a different story. FIXME: more */ -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) -#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) + +#include +#include +#include +#include +#include +#include + +#ifndef CONFIG_SMP +#define TLB_NR_PTRS 1 +#else +#define TLB_NR_PTRS 508 +#endif + +struct mmu_gather { + struct mm_struct *mm; + unsigned int fullmm; + unsigned int nr_ptes; + unsigned int nr_pmds; + void *array[TLB_NR_PTRS]; +}; + +DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); + +static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, + unsigned int full_mm_flush) +{ + struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); + + tlb->mm = mm; + tlb->fullmm = full_mm_flush || (num_online_cpus() == 1) || + (atomic_read(&mm->mm_users) <= 1 && mm == current->active_mm); + tlb->nr_ptes = 0; + tlb->nr_pmds = TLB_NR_PTRS; + if (tlb->fullmm) + __tlb_flush_mm(mm); + return tlb; +} + +static inline void tlb_flush_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pmds < TLB_NR_PTRS)) + __tlb_flush_mm(tlb->mm); + while (tlb->nr_ptes > 0) + pte_free(tlb->array[--tlb->nr_ptes]); + while (tlb->nr_pmds < TLB_NR_PTRS) + pmd_free((pmd_t *) tlb->array[tlb->nr_pmds++]); +} + +static inline void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + tlb_flush_mmu(tlb, start, end); + + /* keep the page table cache within bounds */ + check_pgt_cache(); + + put_cpu_var(mmu_gathers); +} /* - * .. because we flush the whole mm when it - * fills up. + * Release the page cache reference for a pte removed by + * tlb_ptep_clear_flush. In both flush modes the tlb fo a page cache page + * has already been freed, so just do free_page_and_swap_cache. */ -#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) +static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + free_page_and_swap_cache(page); +} -#include +/* + * pte_free_tlb frees a pte table and clears the CRSTE for the + * page table from the tlb. + */ +static inline void pte_free_tlb(struct mmu_gather *tlb, struct page *page) +{ + if (!tlb->fullmm) { + tlb->array[tlb->nr_ptes++] = page; + if (tlb->nr_ptes >= tlb->nr_pmds) + tlb_flush_mmu(tlb, 0, 0); + } else + pte_free(page); +} +/* + * pmd_free_tlb frees a pmd table and clears the CRSTE for the + * segment table entry from the tlb. + */ +static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) +{ +#ifdef __s390x__ + if (!tlb->fullmm) { + tlb->array[--tlb->nr_pmds] = (struct page *) pmd; + if (tlb->nr_ptes >= tlb->nr_pmds) + tlb_flush_mmu(tlb, 0, 0); + } else + pmd_free(pmd); #endif +} + +#define tlb_start_vma(tlb, vma) do { } while (0) +#define tlb_end_vma(tlb, vma) do { } while (0) +#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0) +#define tlb_migrate_finish(mm) do { } while (0) + +#endif /* _S390_TLB_H */ diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h index 6de2632a3e4f..3a9985fbc8af 100644 --- a/include/asm-s390/tlbflush.h +++ b/include/asm-s390/tlbflush.h @@ -6,68 +6,19 @@ #include /* - * TLB flushing: - * - * - flush_tlb() flushes the current mm struct TLBs - * - flush_tlb_all() flushes all processes TLBs - * - flush_tlb_mm(mm) flushes the specified mm context TLB's - * - flush_tlb_page(vma, vmaddr) flushes one page - * - flush_tlb_range(vma, start, end) flushes a range of pages - * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages + * Flush all tlb entries on the local cpu. */ +static inline void __tlb_flush_local(void) +{ + asm volatile("ptlb" : : : "memory"); +} /* - * S/390 has three ways of flushing TLBs - * 'ptlb' does a flush of the local processor - * 'csp' flushes the TLBs on all PUs of a SMP - * 'ipte' invalidates a pte in a page table and flushes that out of - * the TLBs of all PUs of a SMP + * Flush all tlb entries on all cpus. */ - -#define local_flush_tlb() \ -do { asm volatile("ptlb": : :"memory"); } while (0) - -#ifndef CONFIG_SMP - -/* - * We always need to flush, since s390 does not flush tlb - * on each context switch - */ - -static inline void flush_tlb(void) -{ - local_flush_tlb(); -} -static inline void flush_tlb_all(void) -{ - local_flush_tlb(); -} -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - local_flush_tlb(); -} -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - local_flush_tlb(); -} -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - local_flush_tlb(); -} - -#define flush_tlb_kernel_range(start, end) \ - local_flush_tlb(); - -#else - -#include - -extern void smp_ptlb_all(void); - -static inline void global_flush_tlb(void) +static inline void __tlb_flush_global(void) { + extern void smp_ptlb_all(void); register unsigned long reg2 asm("2"); register unsigned long reg3 asm("3"); register unsigned long reg4 asm("4"); @@ -89,66 +40,75 @@ static inline void global_flush_tlb(void) } /* - * We only have to do global flush of tlb if process run since last - * flush on any other pu than current. - * If we have threads (mm->count > 1) we always do a global flush, - * since the process runs on more than one processor at the same time. + * Flush all tlb entries of a page table on all cpus. */ +static inline void __tlb_flush_idte(pgd_t *pgd) +{ + asm volatile( + " .insn rrf,0xb98e0000,0,%0,%1,0" + : : "a" (2048), "a" (__pa(pgd) & PAGE_MASK) : "cc" ); +} -static inline void __flush_tlb_mm(struct mm_struct * mm) +static inline void __tlb_flush_mm(struct mm_struct * mm) { cpumask_t local_cpumask; if (unlikely(cpus_empty(mm->cpu_vm_mask))) return; + /* + * If the machine has IDTE we prefer to do a per mm flush + * on all cpus instead of doing a local flush if the mm + * only ran on the local cpu. + */ if (MACHINE_HAS_IDTE) { pgd_t *shadow_pgd = get_shadow_pgd(mm->pgd); - if (shadow_pgd) { - asm volatile( - " .insn rrf,0xb98e0000,0,%0,%1,0" - : : "a" (2048), - "a" (__pa(shadow_pgd) & PAGE_MASK) : "cc" ); - } - asm volatile( - " .insn rrf,0xb98e0000,0,%0,%1,0" - : : "a" (2048), "a" (__pa(mm->pgd)&PAGE_MASK) : "cc"); + if (shadow_pgd) + __tlb_flush_idte(shadow_pgd); + __tlb_flush_idte(mm->pgd); return; } preempt_disable(); + /* + * If the process only ran on the local cpu, do a local flush. + */ local_cpumask = cpumask_of_cpu(smp_processor_id()); if (cpus_equal(mm->cpu_vm_mask, local_cpumask)) - local_flush_tlb(); + __tlb_flush_local(); else - global_flush_tlb(); + __tlb_flush_global(); preempt_enable(); } -static inline void flush_tlb(void) +static inline void __tlb_flush_mm_cond(struct mm_struct * mm) { - __flush_tlb_mm(current->mm); -} -static inline void flush_tlb_all(void) -{ - global_flush_tlb(); -} -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - __flush_tlb_mm(mm); -} -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - __flush_tlb_mm(vma->vm_mm); -} -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - __flush_tlb_mm(vma->vm_mm); + if (atomic_read(&mm->mm_users) <= 1 && mm == current->active_mm) + __tlb_flush_mm(mm); } -#define flush_tlb_kernel_range(start, end) global_flush_tlb() +/* + * TLB flushing: + * flush_tlb() - flushes the current mm struct TLBs + * flush_tlb_all() - flushes all processes TLBs + * flush_tlb_mm(mm) - flushes the specified mm context TLB's + * flush_tlb_page(vma, vmaddr) - flushes one page + * flush_tlb_range(vma, start, end) - flushes a range of pages + * flush_tlb_kernel_range(start, end) - flushes a range of kernel pages + */ -#endif +/* + * flush_tlb_mm goes together with ptep_set_wrprotect for the + * copy_page_range operation and flush_tlb_range is related to + * ptep_get_and_clear for change_protection. ptep_set_wrprotect and + * ptep_get_and_clear do not flush the TLBs directly if the mm has + * only one user. At the end of the update the flush_tlb_mm and + * flush_tlb_range functions need to do the flush. + */ +#define flush_tlb() do { } while (0) +#define flush_tlb_all() do { } while (0) +#define flush_tlb_mm(mm) __tlb_flush_mm_cond(mm) +#define flush_tlb_page(vma, addr) do { } while (0) +#define flush_tlb_range(vma, start, end) __tlb_flush_mm_cond(mm) +#define flush_tlb_kernel_range(start, end) __tlb_flush_mm(&init_mm) #endif /* _S390_TLBFLUSH_H */