diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 30278e9e5211..01a2992b5754 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -776,11 +776,11 @@ and is between 256 and 4096 characters. It is defined in the file hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot. hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages. - On x86 this option can be specified multiple times - interleaved with hugepages= to reserve huge pages - of different sizes. Valid pages sizes on x86-64 - are 2M (when the CPU supports "pse") and 1G (when the - CPU supports the "pdpe1gb" cpuinfo flag) + On x86-64 and powerpc, this option can be specified + multiple times interleaved with hugepages= to reserve + huge pages of different sizes. Valid pages sizes on + x86-64 are 2M (when the CPU supports "pse") and 1G + (when the CPU supports the "pdpe1gb" cpuinfo flag) Note that 1GB pages can only be allocated at boot time using hugepages= and not freed afterwards. default_hugepagesz= diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index ae4c717243a5..5ce5a4dcd008 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -103,7 +103,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M; int mmu_highuser_ssize = MMU_SEGSIZE_256M; u16 mmu_slb_size = 64; #ifdef CONFIG_HUGETLB_PAGE -int mmu_huge_psize = MMU_PAGE_16M; unsigned int HPAGE_SHIFT; #endif #ifdef CONFIG_PPC_64K_PAGES @@ -460,15 +459,15 @@ static void __init htab_init_page_sizes(void) /* Reserve 16G huge page memory sections for huge pages */ of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); -/* Init large page size. Currently, we pick 16M or 1M depending +/* Set default large page size. Currently, we pick 16M or 1M depending * on what is available */ if (mmu_psize_defs[MMU_PAGE_16M].shift) - set_huge_psize(MMU_PAGE_16M); + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; /* With 4k/4level pagetables, we can't (for now) cope with a * huge page size < PMD_SIZE */ else if (mmu_psize_defs[MMU_PAGE_1M].shift) - set_huge_psize(MMU_PAGE_1M); + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; #endif /* CONFIG_HUGETLB_PAGE */ } @@ -889,7 +888,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) #ifdef CONFIG_HUGETLB_PAGE /* Handle hugepage regions */ - if (HPAGE_SHIFT && psize == mmu_huge_psize) { + if (HPAGE_SHIFT && mmu_huge_psizes[psize]) { DBG_LOW(" -> huge page !\n"); return hash_huge_page(mm, access, ea, vsid, local, trap); } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 19b1a9cec6d5..fb42c4dd3217 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -37,15 +37,30 @@ static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; static unsigned nr_gpages; -unsigned int hugepte_shift; -#define PTRS_PER_HUGEPTE (1 << hugepte_shift) -#define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << hugepte_shift) +/* Array of valid huge page sizes - non-zero value(hugepte_shift) is + * stored for the huge page sizes that are valid. + */ +unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ -#define HUGEPD_SHIFT (HPAGE_SHIFT + hugepte_shift) -#define HUGEPD_SIZE (1UL << HUGEPD_SHIFT) -#define HUGEPD_MASK (~(HUGEPD_SIZE-1)) +#define hugepte_shift mmu_huge_psizes +#define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize]) +#define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize]) -#define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM]) +#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \ + + hugepte_shift[psize]) +#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize)) +#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1)) + +/* Subtract one from array size because we don't need a cache for 4K since + * is not a huge page size */ +#define huge_pgtable_cache(psize) (pgtable_cache[HUGEPTE_CACHE_NUM \ + + psize-1]) +#define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize]) + +static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = { + "unused_4K", "hugepte_cache_64K", "unused_64K_AP", + "hugepte_cache_1M", "hugepte_cache_16M", "hugepte_cache_16G" +}; /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() * will choke on pointers to hugepte tables, which is handy for @@ -56,24 +71,49 @@ typedef struct { unsigned long pd; } hugepd_t; #define hugepd_none(hpd) ((hpd).pd == 0) +static inline int shift_to_mmu_psize(unsigned int shift) +{ + switch (shift) { +#ifndef CONFIG_PPC_64K_PAGES + case PAGE_SHIFT_64K: + return MMU_PAGE_64K; +#endif + case PAGE_SHIFT_16M: + return MMU_PAGE_16M; + case PAGE_SHIFT_16G: + return MMU_PAGE_16G; + } + return -1; +} + +static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) +{ + if (mmu_psize_defs[mmu_psize].shift) + return mmu_psize_defs[mmu_psize].shift; + BUG(); +} + static inline pte_t *hugepd_page(hugepd_t hpd) { BUG_ON(!(hpd.pd & HUGEPD_OK)); return (pte_t *)(hpd.pd & ~HUGEPD_OK); } -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr) +static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, + struct hstate *hstate) { - unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1)); + unsigned int shift = huge_page_shift(hstate); + int psize = shift_to_mmu_psize(shift); + unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1)); pte_t *dir = hugepd_page(*hpdp); return dir + idx; } static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, - unsigned long address) + unsigned long address, unsigned int psize) { - pte_t *new = kmem_cache_alloc(huge_pgtable_cache, + pte_t *new = kmem_cache_alloc(huge_pgtable_cache(psize), GFP_KERNEL|__GFP_REPEAT); if (! new) @@ -81,7 +121,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, spin_lock(&mm->page_table_lock); if (!hugepd_none(*hpdp)) - kmem_cache_free(huge_pgtable_cache, new); + kmem_cache_free(huge_pgtable_cache(psize), new); else hpdp->pd = (unsigned long)new | HUGEPD_OK; spin_unlock(&mm->page_table_lock); @@ -90,21 +130,22 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, /* Base page size affects how we walk hugetlb page tables */ #ifdef CONFIG_PPC_64K_PAGES -#define hpmd_offset(pud, addr) pmd_offset(pud, addr) -#define hpmd_alloc(mm, pud, addr) pmd_alloc(mm, pud, addr) +#define hpmd_offset(pud, addr, h) pmd_offset(pud, addr) +#define hpmd_alloc(mm, pud, addr, h) pmd_alloc(mm, pud, addr) #else static inline -pmd_t *hpmd_offset(pud_t *pud, unsigned long addr) +pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate) { - if (HPAGE_SHIFT == PAGE_SHIFT_64K) + if (huge_page_shift(hstate) == PAGE_SHIFT_64K) return pmd_offset(pud, addr); else return (pmd_t *) pud; } static inline -pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr) +pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr, + struct hstate *hstate) { - if (HPAGE_SHIFT == PAGE_SHIFT_64K) + if (huge_page_shift(hstate) == PAGE_SHIFT_64K) return pmd_alloc(mm, pud, addr); else return (pmd_t *) pud; @@ -128,8 +169,9 @@ void add_gpage(unsigned long addr, unsigned long page_size, } /* Moves the gigantic page addresses from the temporary list to the - * huge_boot_pages list. */ -int alloc_bootmem_huge_page(struct hstate *h) + * huge_boot_pages list. + */ +int alloc_bootmem_huge_page(struct hstate *hstate) { struct huge_bootmem_page *m; if (nr_gpages == 0) @@ -137,7 +179,7 @@ int alloc_bootmem_huge_page(struct hstate *h) m = phys_to_virt(gpage_freearray[--nr_gpages]); gpage_freearray[nr_gpages] = 0; list_add(&m->list, &huge_boot_pages); - m->hstate = h; + m->hstate = hstate; return 1; } @@ -149,17 +191,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pud_t *pu; pmd_t *pm; - BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); + unsigned int psize; + unsigned int shift; + unsigned long sz; + struct hstate *hstate; + psize = get_slice_psize(mm, addr); + shift = mmu_psize_to_shift(psize); + sz = ((1UL) << shift); + hstate = size_to_hstate(sz); - addr &= HPAGE_MASK; + addr &= hstate->mask; pg = pgd_offset(mm, addr); if (!pgd_none(*pg)) { pu = pud_offset(pg, addr); if (!pud_none(*pu)) { - pm = hpmd_offset(pu, addr); + pm = hpmd_offset(pu, addr, hstate); if (!pmd_none(*pm)) - return hugepte_offset((hugepd_t *)pm, addr); + return hugepte_offset((hugepd_t *)pm, addr, + hstate); } } @@ -173,16 +223,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pud_t *pu; pmd_t *pm; hugepd_t *hpdp = NULL; + struct hstate *hstate; + unsigned int psize; + hstate = size_to_hstate(sz); - BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); + psize = get_slice_psize(mm, addr); + BUG_ON(!mmu_huge_psizes[psize]); - addr &= HPAGE_MASK; + addr &= hstate->mask; pg = pgd_offset(mm, addr); pu = pud_alloc(mm, pg, addr); if (pu) { - pm = hpmd_alloc(mm, pu, addr); + pm = hpmd_alloc(mm, pu, addr, hstate); if (pm) hpdp = (hugepd_t *)pm; } @@ -190,10 +244,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, if (! hpdp) return NULL; - if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr)) + if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize)) return NULL; - return hugepte_offset(hpdp, addr); + return hugepte_offset(hpdp, addr, hstate); } int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) @@ -201,19 +255,22 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) return 0; } -static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) +static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp, + unsigned int psize) { pte_t *hugepte = hugepd_page(*hpdp); hpdp->pd = 0; tlb->need_flush = 1; - pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM, + pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, + HUGEPTE_CACHE_NUM+psize-1, PGF_CACHENUM_MASK)); } static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, unsigned long addr, unsigned long end, - unsigned long floor, unsigned long ceiling) + unsigned long floor, unsigned long ceiling, + unsigned int psize) { pmd_t *pmd; unsigned long next; @@ -225,7 +282,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, next = pmd_addr_end(addr, end); if (pmd_none(*pmd)) continue; - free_hugepte_range(tlb, (hugepd_t *)pmd); + free_hugepte_range(tlb, (hugepd_t *)pmd, psize); } while (pmd++, addr = next, addr != end); start &= PUD_MASK; @@ -251,6 +308,9 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, pud_t *pud; unsigned long next; unsigned long start; + unsigned int shift; + unsigned int psize = get_slice_psize(tlb->mm, addr); + shift = mmu_psize_to_shift(psize); start = addr; pud = pud_offset(pgd, addr); @@ -259,16 +319,18 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, #ifdef CONFIG_PPC_64K_PAGES if (pud_none_or_clear_bad(pud)) continue; - hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); + hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling, + psize); #else - if (HPAGE_SHIFT == PAGE_SHIFT_64K) { + if (shift == PAGE_SHIFT_64K) { if (pud_none_or_clear_bad(pud)) continue; - hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); + hugetlb_free_pmd_range(tlb, pud, addr, next, floor, + ceiling, psize); } else { if (pud_none(*pud)) continue; - free_hugepte_range(tlb, (hugepd_t *)pud); + free_hugepte_range(tlb, (hugepd_t *)pud, psize); } #endif } while (pud++, addr = next, addr != end); @@ -336,27 +398,29 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, * now has no other vmas using it, so can be freed, we don't * bother to round floor or end up - the tests don't need that. */ + unsigned int psize = get_slice_psize(tlb->mm, addr); - addr &= HUGEPD_MASK; + addr &= HUGEPD_MASK(psize); if (addr < floor) { - addr += HUGEPD_SIZE; + addr += HUGEPD_SIZE(psize); if (!addr) return; } if (ceiling) { - ceiling &= HUGEPD_MASK; + ceiling &= HUGEPD_MASK(psize); if (!ceiling) return; } if (end - 1 > ceiling - 1) - end -= HUGEPD_SIZE; + end -= HUGEPD_SIZE(psize); if (addr > end - 1) return; start = addr; pgd = pgd_offset(tlb->mm, addr); do { - BUG_ON(get_slice_psize(tlb->mm, addr) != mmu_huge_psize); + psize = get_slice_psize(tlb->mm, addr); + BUG_ON(!mmu_huge_psizes[psize]); next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; @@ -373,7 +437,11 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, * necessary anymore if we make hpte_need_flush() get the * page size from the slices */ - pte_update(mm, addr & HPAGE_MASK, ptep, ~0UL, 1); + unsigned int psize = get_slice_psize(mm, addr); + unsigned int shift = mmu_psize_to_shift(psize); + unsigned long sz = ((1UL) << shift); + struct hstate *hstate = size_to_hstate(sz); + pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1); } *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); } @@ -390,14 +458,19 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { pte_t *ptep; struct page *page; + unsigned int mmu_psize = get_slice_psize(mm, address); - if (get_slice_psize(mm, address) != mmu_huge_psize) + /* Verify it is a huge page else bail. */ + if (!mmu_huge_psizes[mmu_psize]) return ERR_PTR(-EINVAL); ptep = huge_pte_offset(mm, address); page = pte_page(*ptep); - if (page) - page += (address % HPAGE_SIZE) / PAGE_SIZE; + if (page) { + unsigned int shift = mmu_psize_to_shift(mmu_psize); + unsigned long sz = ((1UL) << shift); + page += (address % sz) / PAGE_SIZE; + } return page; } @@ -425,15 +498,16 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - return slice_get_unmapped_area(addr, len, flags, - mmu_huge_psize, 1, 0); + struct hstate *hstate = hstate_file(file); + int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); + return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); } /* * Called by asm hashtable.S for doing lazy icache flush */ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, - pte_t pte, int trap) + pte_t pte, int trap, unsigned long sz) { struct page *page; int i; @@ -446,7 +520,7 @@ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, /* page is dirty */ if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { if (trap == 0x400) { - for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) + for (i = 0; i < (sz / PAGE_SIZE); i++) __flush_dcache_icache(page_address(page+i)); set_bit(PG_arch_1, &page->flags); } else { @@ -462,11 +536,16 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, { pte_t *ptep; unsigned long old_pte, new_pte; - unsigned long va, rflags, pa; + unsigned long va, rflags, pa, sz; long slot; int err = 1; int ssize = user_segment_size(ea); + unsigned int mmu_psize; + int shift; + mmu_psize = get_slice_psize(mm, ea); + if (!mmu_huge_psizes[mmu_psize]) + goto out; ptep = huge_pte_offset(mm, ea); /* Search the Linux page table for a match with va */ @@ -509,30 +588,32 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, rflags = 0x2 | (!(new_pte & _PAGE_RW)); /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); + shift = mmu_psize_to_shift(mmu_psize); + sz = ((1UL) << shift); if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) /* No CPU has hugepages but lacks no execute, so we * don't need to worry about that case */ rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), - trap); + trap, sz); /* Check if pte already has an hpte (case 2) */ if (unlikely(old_pte & _PAGE_HASHPTE)) { /* There MIGHT be an HPTE for this pte */ unsigned long hash, slot; - hash = hpt_hash(va, HPAGE_SHIFT, ssize); + hash = hpt_hash(va, shift, ssize); if (old_pte & _PAGE_F_SECOND) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & _PAGE_F_GIX) >> 12; - if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, + if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize, ssize, local) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } if (likely(!(old_pte & _PAGE_HASHPTE))) { - unsigned long hash = hpt_hash(va, HPAGE_SHIFT, ssize); + unsigned long hash = hpt_hash(va, shift, ssize); unsigned long hpte_group; pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; @@ -553,7 +634,7 @@ repeat: /* Insert into the hash table, primary slot */ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, - mmu_huge_psize, ssize); + mmu_psize, ssize); /* Primary is full, try the secondary */ if (unlikely(slot == -1)) { @@ -561,7 +642,7 @@ repeat: HPTES_PER_GROUP) & ~0x7UL; slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, HPTE_V_SECONDARY, - mmu_huge_psize, ssize); + mmu_psize, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * @@ -598,66 +679,50 @@ void set_huge_psize(int psize) (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT || mmu_psize_defs[psize].shift == PAGE_SHIFT_64K || mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) { - /* Return if huge page size is the same as the - * base page size. */ - if (mmu_psize_defs[psize].shift == PAGE_SHIFT) + /* Return if huge page size has already been setup or is the + * same as the base page size. */ + if (mmu_huge_psizes[psize] || + mmu_psize_defs[psize].shift == PAGE_SHIFT) return; + hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); - HPAGE_SHIFT = mmu_psize_defs[psize].shift; - mmu_huge_psize = psize; - - switch (HPAGE_SHIFT) { + switch (mmu_psize_defs[psize].shift) { case PAGE_SHIFT_64K: /* We only allow 64k hpages with 4k base page, * which was checked above, and always put them * at the PMD */ - hugepte_shift = PMD_SHIFT; + hugepte_shift[psize] = PMD_SHIFT; break; case PAGE_SHIFT_16M: /* 16M pages can be at two different levels * of pagestables based on base page size */ if (PAGE_SHIFT == PAGE_SHIFT_64K) - hugepte_shift = PMD_SHIFT; + hugepte_shift[psize] = PMD_SHIFT; else /* 4k base page */ - hugepte_shift = PUD_SHIFT; + hugepte_shift[psize] = PUD_SHIFT; break; case PAGE_SHIFT_16G: /* 16G pages are always at PGD level */ - hugepte_shift = PGDIR_SHIFT; + hugepte_shift[psize] = PGDIR_SHIFT; break; } - hugepte_shift -= HPAGE_SHIFT; + hugepte_shift[psize] -= mmu_psize_defs[psize].shift; } else - HPAGE_SHIFT = 0; + hugepte_shift[psize] = 0; } static int __init hugepage_setup_sz(char *str) { unsigned long long size; - int mmu_psize = -1; + int mmu_psize; int shift; size = memparse(str, &str); shift = __ffs(size); - switch (shift) { -#ifndef CONFIG_PPC_64K_PAGES - case PAGE_SHIFT_64K: - mmu_psize = MMU_PAGE_64K; - break; -#endif - case PAGE_SHIFT_16M: - mmu_psize = MMU_PAGE_16M; - break; - case PAGE_SHIFT_16G: - mmu_psize = MMU_PAGE_16G; - break; - } - - if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift) { + mmu_psize = shift_to_mmu_psize(shift); + if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift) set_huge_psize(mmu_psize); - hugetlb_add_hstate(shift - PAGE_SHIFT); - } else printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); @@ -672,16 +737,31 @@ static void zero_ctor(struct kmem_cache *cache, void *addr) static int __init hugetlbpage_init(void) { + unsigned int psize; + if (!cpu_has_feature(CPU_FTR_16M_PAGE)) return -ENODEV; + /* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE + * and adjust PTE_NONCACHE_NUM if the number of supported huge page + * sizes changes. + */ + set_huge_psize(MMU_PAGE_16M); + set_huge_psize(MMU_PAGE_64K); + set_huge_psize(MMU_PAGE_16G); - huge_pgtable_cache = kmem_cache_create("hugepte_cache", - HUGEPTE_TABLE_SIZE, - HUGEPTE_TABLE_SIZE, - 0, - zero_ctor); - if (! huge_pgtable_cache) - panic("hugetlbpage_init(): could not create hugepte cache\n"); + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + if (mmu_huge_psizes[psize]) { + huge_pgtable_cache(psize) = kmem_cache_create( + HUGEPTE_CACHE_NAME(psize), + HUGEPTE_TABLE_SIZE(psize), + HUGEPTE_TABLE_SIZE(psize), + 0, + zero_ctor); + if (!huge_pgtable_cache(psize)) + panic("hugetlbpage_init(): could not create %s"\ + "\n", HUGEPTE_CACHE_NAME(psize)); + } + } return 0; } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 6ef63caca682..a41bc5aa2043 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -153,10 +153,10 @@ static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { }; #ifdef CONFIG_HUGETLB_PAGE -/* Hugepages need one extra cache, initialized in hugetlbpage.c. We - * can't put into the tables above, because HPAGE_SHIFT is not compile - * time constant. */ -struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1]; +/* Hugepages need an extra cache per hugepagesize, initialized in + * hugetlbpage.c. We can't put into the tables above, because HPAGE_SHIFT + * is not compile time constant. */ +struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT]; #else struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; #endif diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index a01b5c608ff9..409fcc7b63ce 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -147,7 +147,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, */ if (huge) { #ifdef CONFIG_HUGETLB_PAGE - psize = mmu_huge_psize; + psize = get_slice_psize(mm, addr);; #else BUG(); psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ diff --git a/include/asm-powerpc/hugetlb.h b/include/asm-powerpc/hugetlb.h index ca37c4af27b1..26f0d0ab27a5 100644 --- a/include/asm-powerpc/hugetlb.h +++ b/include/asm-powerpc/hugetlb.h @@ -24,9 +24,10 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len) { - if (len & ~HPAGE_MASK) + struct hstate *h = hstate_file(file); + if (len & ~huge_page_mask(h)) return -EINVAL; - if (addr & ~HPAGE_MASK) + if (addr & ~huge_page_mask(h)) return -EINVAL; return 0; } diff --git a/include/asm-powerpc/mmu-hash64.h b/include/asm-powerpc/mmu-hash64.h index b61181aa7746..19c7a9403490 100644 --- a/include/asm-powerpc/mmu-hash64.h +++ b/include/asm-powerpc/mmu-hash64.h @@ -194,9 +194,9 @@ extern int mmu_ci_restrictions; #ifdef CONFIG_HUGETLB_PAGE /* - * The page size index of the huge pages for use by hugetlbfs + * The page size indexes of the huge pages for use by hugetlbfs */ -extern int mmu_huge_psize; +extern unsigned int mmu_huge_psizes[MMU_PAGE_COUNT]; #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/include/asm-powerpc/page_64.h b/include/asm-powerpc/page_64.h index 02fd80710e9d..043bfdfe4f73 100644 --- a/include/asm-powerpc/page_64.h +++ b/include/asm-powerpc/page_64.h @@ -90,6 +90,7 @@ extern unsigned int HPAGE_SHIFT; #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) +#define HUGE_MAX_HSTATE 3 #endif /* __ASSEMBLY__ */ diff --git a/include/asm-powerpc/pgalloc-64.h b/include/asm-powerpc/pgalloc-64.h index 68980990f62a..812a1d8f35cb 100644 --- a/include/asm-powerpc/pgalloc-64.h +++ b/include/asm-powerpc/pgalloc-64.h @@ -22,7 +22,7 @@ extern struct kmem_cache *pgtable_cache[]; #define PUD_CACHE_NUM 1 #define PMD_CACHE_NUM 1 #define HUGEPTE_CACHE_NUM 2 -#define PTE_NONCACHE_NUM 3 /* from GFP rather than kmem_cache */ +#define PTE_NONCACHE_NUM 7 /* from GFP rather than kmem_cache */ static inline pgd_t *pgd_alloc(struct mm_struct *mm) { @@ -119,7 +119,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) __free_page(ptepage); } -#define PGF_CACHENUM_MASK 0x3 +#define PGF_CACHENUM_MASK 0x7 typedef struct pgtable_free { unsigned long val;