diff --git a/Documentation/features/vm/THP/arch-support.txt b/Documentation/features/vm/THP/arch-support.txt index df384e3e845f..523f8307b9cd 100644 --- a/Documentation/features/vm/THP/arch-support.txt +++ b/Documentation/features/vm/THP/arch-support.txt @@ -7,7 +7,7 @@ | arch |status| ----------------------- | alpha: | TODO | - | arc: | .. | + | arc: | ok | | arm: | ok | | arm64: | ok | | avr32: | .. | diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt index aaaa21db6226..3de5434c857c 100644 --- a/Documentation/features/vm/pte_special/arch-support.txt +++ b/Documentation/features/vm/pte_special/arch-support.txt @@ -7,7 +7,7 @@ | arch |status| ----------------------- | alpha: | TODO | - | arc: | TODO | + | arc: | ok | | arm: | ok | | arm64: | ok | | avr32: | TODO | diff --git a/MAINTAINERS b/MAINTAINERS index 77ed3a028625..6a0b9ca65407 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10121,6 +10121,7 @@ F: include/net/switchdev.h SYNOPSYS ARC ARCHITECTURE M: Vineet Gupta +L: linux-snps-arc@lists.infraded.org S: Supported F: arch/arc/ F: Documentation/devicetree/bindings/arc/* diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 78c0621d5819..2c2ac3f3ff80 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -76,6 +76,10 @@ config STACKTRACE_SUPPORT config HAVE_LATENCYTOP_SUPPORT def_bool y +config HAVE_ARCH_TRANSPARENT_HUGEPAGE + def_bool y + depends on ARC_MMU_V4 + source "init/Kconfig" source "kernel/Kconfig.freezer" @@ -190,6 +194,16 @@ config NR_CPUS range 2 4096 default "4" +config ARC_SMP_HALT_ON_RESET + bool "Enable Halt-on-reset boot mode" + default y if ARC_UBOOT_SUPPORT + help + In SMP configuration cores can be configured as Halt-on-reset + or they could all start at same time. For Halt-on-reset, non + masters are parked until Master kicks them so they can start of + at designated entry point. For other case, all jump to common + entry point and spin wait for Master's signal. + endif #SMP menuconfig ARC_CACHE @@ -278,6 +292,8 @@ choice default ARC_MMU_V2 if ARC_CPU_750D default ARC_MMU_V4 if ARC_CPU_HS +if ISA_ARCOMPACT + config ARC_MMU_V1 bool "MMU v1" help @@ -297,6 +313,8 @@ config ARC_MMU_V3 Variable Page size (1k-16k), var JTLB size 128 x (2 or 4) Shared Address Spaces (SASID) +endif + config ARC_MMU_V4 bool "MMU v4" depends on ISA_ARCV2 @@ -428,6 +446,28 @@ config LINUX_LINK_BASE Linux needs to be scooted a bit. If you don't know what the above means, leave this setting alone. +config HIGHMEM + bool "High Memory Support" + help + With ARC 2G:2G address split, only upper 2G is directly addressable by + kernel. Enable this to potentially allow access to rest of 2G and PAE + in future + +config ARC_HAS_PAE40 + bool "Support for the 40-bit Physical Address Extension" + default n + depends on ISA_ARCV2 + select HIGHMEM + help + Enable access to physical memory beyond 4G, only supported on + ARC cores with 40 bit Physical Addressing support + +config ARCH_PHYS_ADDR_T_64BIT + def_bool ARC_HAS_PAE40 + +config ARCH_DMA_ADDR_T_64BIT + bool + config ARC_CURR_IN_REG bool "Dedicate Register r25 for current_task pointer" default y diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi index a5e2726a067e..420dcfde289f 100644 --- a/arch/arc/boot/dts/axc001.dtsi +++ b/arch/arc/boot/dts/axc001.dtsi @@ -95,6 +95,6 @@ #size-cells = <1>; ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x00000000 0x20000000>; /* 512MiB */ + reg = <0x80000000 0x20000000>; /* 512MiB */ }; }; diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 846481f37eef..f90fadf7f94e 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -98,6 +98,6 @@ #size-cells = <1>; ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x00000000 0x20000000>; /* 512MiB */ + reg = <0x80000000 0x20000000>; /* 512MiB */ }; }; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 2f0b33257db2..06a9f294a2e6 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -121,6 +121,6 @@ #size-cells = <1>; ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x00000000 0x20000000>; /* 512MiB */ + reg = <0x80000000 0x20000000>; /* 512MiB */ }; }; diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts index 911f069e0540..b0eb0e7fe21d 100644 --- a/arch/arc/boot/dts/nsim_hs.dts +++ b/arch/arc/boot/dts/nsim_hs.dts @@ -11,8 +11,16 @@ / { compatible = "snps,nsim_hs"; + #address-cells = <2>; + #size-cells = <2>; interrupt-parent = <&core_intc>; + memory { + device_type = "memory"; + reg = <0x0 0x80000000 0x0 0x40000000 /* 1 GB low mem */ + 0x1 0x00000000 0x0 0x40000000>; /* 1 GB highmem */ + }; + chosen { bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8"; }; @@ -26,8 +34,8 @@ #address-cells = <1>; #size-cells = <1>; - /* child and parent address space 1:1 mapped */ - ranges; + /* only perip space at end of low mem accessible */ + ranges = <0x80000000 0x0 0x80000000 0x80000000>; core_intc: core-interrupt-controller { compatible = "snps,archs-intc"; diff --git a/arch/arc/boot/dts/skeleton.dtsi b/arch/arc/boot/dts/skeleton.dtsi index a870bdd5e404..296d371a335c 100644 --- a/arch/arc/boot/dts/skeleton.dtsi +++ b/arch/arc/boot/dts/skeleton.dtsi @@ -32,6 +32,6 @@ memory { device_type = "memory"; - reg = <0x00000000 0x10000000>; /* 256M */ + reg = <0x80000000 0x10000000>; /* 256M */ }; }; diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi index 9393fd902f0d..84226bd48baf 100644 --- a/arch/arc/boot/dts/vdk_axc003.dtsi +++ b/arch/arc/boot/dts/vdk_axc003.dtsi @@ -56,6 +56,6 @@ #size-cells = <1>; ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x00000000 0x20000000>; /* 512MiB */ + reg = <0x80000000 0x20000000>; /* 512MiB */ }; }; diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi index 9bee8ed09eb0..31f0fb5fc91d 100644 --- a/arch/arc/boot/dts/vdk_axc003_idu.dtsi +++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi @@ -71,6 +71,6 @@ #size-cells = <1>; ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x00000000 0x20000000>; /* 512MiB */ + reg = <0x80000000 0x20000000>; /* 512MiB */ }; }; diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index d8023bc8d1ad..7fac7d85ed6a 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -120,7 +120,7 @@ /* gcc builtin sr needs reg param to be long immediate */ #define write_aux_reg(reg_immed, val) \ - __builtin_arc_sr((unsigned int)val, reg_immed) + __builtin_arc_sr((unsigned int)(val), reg_immed) #else @@ -327,8 +327,8 @@ struct bcr_generic { */ struct cpuinfo_arc_mmu { - unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, u_dtlb:6, u_itlb:6; - unsigned int num_tlb:16, sets:12, ways:4; + unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:10, sasid:1, pae:1; + unsigned int sets:12, ways:4, u_dtlb:8, u_itlb:8; }; struct cpuinfo_arc_cache { diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index e23ea6e7633a..abf06e81c929 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -65,6 +65,7 @@ extern int ioc_exists; #if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4) #define ARC_REG_IC_PTAG 0x1E #endif +#define ARC_REG_IC_PTAG_HI 0x1F /* Bit val in IC_CTRL */ #define IC_CTRL_CACHE_DISABLE 0x1 @@ -77,6 +78,7 @@ extern int ioc_exists; #define ARC_REG_DC_FLSH 0x4B #define ARC_REG_DC_FLDL 0x4C #define ARC_REG_DC_PTAG 0x5C +#define ARC_REG_DC_PTAG_HI 0x5F /* Bit val in DC_CTRL */ #define DC_CTRL_INV_MODE_FLUSH 0x40 diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index 0992d3dbcc65..fbe3587c4f36 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -31,10 +31,10 @@ void flush_cache_all(void); -void flush_icache_range(unsigned long start, unsigned long end); -void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len); -void __inv_icache_page(unsigned long paddr, unsigned long vaddr); -void __flush_dcache_page(unsigned long paddr, unsigned long vaddr); +void flush_icache_range(unsigned long kstart, unsigned long kend); +void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len); +void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr); +void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index 415443c2a8c4..1aff3be91075 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -110,13 +110,12 @@ .macro FAKE_RET_FROM_EXCPN - ld r9, [sp, PT_status32] - bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK) - bset r9, r9, STATUS_L_BIT - sr r9, [erstatus] - mov r9, 55f - sr r9, [eret] - + lr r9, [status32] + bclr r9, r9, STATUS_AE_BIT + or r9, r9, (STATUS_E1_MASK|STATUS_E2_MASK) + sr r9, [erstatus] + mov r9, 55f + sr r9, [eret] rtie 55: .endm diff --git a/arch/arc/include/asm/highmem.h b/arch/arc/include/asm/highmem.h new file mode 100644 index 000000000000..b1585c96324a --- /dev/null +++ b/arch/arc/include/asm/highmem.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef _ASM_HIGHMEM_H +#define _ASM_HIGHMEM_H + +#ifdef CONFIG_HIGHMEM + +#include +#include + +/* start after vmalloc area */ +#define FIXMAP_BASE (PAGE_OFFSET - FIXMAP_SIZE - PKMAP_SIZE) +#define FIXMAP_SIZE PGDIR_SIZE /* only 1 PGD worth */ +#define KM_TYPE_NR ((FIXMAP_SIZE >> PAGE_SHIFT)/NR_CPUS) +#define FIXMAP_ADDR(nr) (FIXMAP_BASE + ((nr) << PAGE_SHIFT)) + +/* start after fixmap area */ +#define PKMAP_BASE (FIXMAP_BASE + FIXMAP_SIZE) +#define PKMAP_SIZE PGDIR_SIZE +#define LAST_PKMAP (PKMAP_SIZE >> PAGE_SHIFT) +#define LAST_PKMAP_MASK (LAST_PKMAP - 1) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) +#define PKMAP_NR(virt) (((virt) - PKMAP_BASE) >> PAGE_SHIFT) + +#define kmap_prot PAGE_KERNEL + + +#include + +extern void *kmap(struct page *page); +extern void *kmap_high(struct page *page); +extern void *kmap_atomic(struct page *page); +extern void __kunmap_atomic(void *kvaddr); +extern void kunmap_high(struct page *page); + +extern void kmap_init(void); + +static inline void flush_cache_kmaps(void) +{ + flush_cache_all(); +} + +static inline void kunmap(struct page *page) +{ + BUG_ON(in_interrupt()); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} + + +#endif + +#endif diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h new file mode 100644 index 000000000000..c5094de86403 --- /dev/null +++ b/arch/arc/include/asm/hugepage.h @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + +#ifndef _ASM_ARC_HUGEPAGE_H +#define _ASM_ARC_HUGEPAGE_H + +#include +#include + +static inline pte_t pmd_pte(pmd_t pmd) +{ + return __pte(pmd_val(pmd)); +} + +static inline pmd_t pte_pmd(pte_t pte) +{ + return __pmd(pte_val(pte)); +} + +#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) +#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) +#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) +#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) +#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) +#define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd))) +#define pmd_mknotpresent(pmd) pte_pmd(pte_mknotpresent(pmd_pte(pmd))) +#define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd))) +#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) + +#define pmd_write(pmd) pte_write(pmd_pte(pmd)) +#define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) +#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) +#define pmd_special(pmd) pte_special(pmd_pte(pmd)) + +#define mk_pmd(page, prot) pte_pmd(mk_pte(page, prot)) + +#define pmd_trans_huge(pmd) (pmd_val(pmd) & _PAGE_HW_SZ) +#define pmd_trans_splitting(pmd) (pmd_trans_huge(pmd) && pmd_special(pmd)) + +#define pfn_pmd(pfn, prot) (__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))) + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + /* + * open-coded pte_modify() with additional retaining of HW_SZ bit + * so that pmd_trans_huge() remains true for this PMD + */ + return __pmd((pmd_val(pmd) & (_PAGE_CHG_MASK | _PAGE_HW_SZ)) | pgprot_val(newprot)); +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; +} + +extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd); + +#define has_transparent_hugepage() 1 + +/* Generic variants assume pgtable_t is struct page *, hence need for these */ +#define __HAVE_ARCH_PGTABLE_DEPOSIT +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); + +#define __HAVE_ARCH_PGTABLE_WITHDRAW +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); + +#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE +extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); + +#endif diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h index bc5103637326..4fd7d62a6e30 100644 --- a/arch/arc/include/asm/irq.h +++ b/arch/arc/include/asm/irq.h @@ -16,6 +16,7 @@ #ifdef CONFIG_ISA_ARCOMPACT #define TIMER0_IRQ 3 #define TIMER1_IRQ 4 +#define IPI_IRQ (NR_CPU_IRQS-1) /* dummy to enable SMP build for up hardware */ #else #define TIMER0_IRQ 16 #define TIMER1_IRQ 17 diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index aa805575c320..d8c608174617 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -23,11 +23,13 @@ #define STATUS_E2_BIT 2 /* Int 2 enable */ #define STATUS_A1_BIT 3 /* Int 1 active */ #define STATUS_A2_BIT 4 /* Int 2 active */ +#define STATUS_AE_BIT 5 /* Exception active */ #define STATUS_E1_MASK (1<> PAGE_SHIFT) #define pfn_valid(pfn) (((pfn) - ARCH_PFN_OFFSET) < max_mapnr) diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 81208bfd9dcb..86ed671286df 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -49,7 +49,7 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep) static inline int __get_order_pgd(void) { - return get_order(PTRS_PER_PGD * 4); + return get_order(PTRS_PER_PGD * sizeof(pgd_t)); } static inline pgd_t *pgd_alloc(struct mm_struct *mm) @@ -87,7 +87,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline int __get_order_pte(void) { - return get_order(PTRS_PER_PTE * 4); + return get_order(PTRS_PER_PTE * sizeof(pte_t)); } static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, @@ -107,10 +107,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) pgtable_t pte_pg; struct page *page; - pte_pg = __get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); + pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); if (!pte_pg) return 0; - memzero((void *)pte_pg, PTRS_PER_PTE * 4); + memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t)); page = virt_to_page(pte_pg); if (!pgtable_page_ctor(page)) { __free_page(page); @@ -128,12 +128,12 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) static inline void pte_free(struct mm_struct *mm, pgtable_t ptep) { pgtable_page_dtor(virt_to_page(ptep)); - free_pages(ptep, __get_order_pte()); + free_pages((unsigned long)ptep, __get_order_pte()); } #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) #define check_pgt_cache() do { } while (0) -#define pmd_pgtable(pmd) pmd_page_vaddr(pmd) +#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd)) #endif /* _ASM_ARC_PGALLOC_H */ diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 1281718802f7..57af2f05ae84 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -38,6 +38,7 @@ #include #include #include +#include /************************************************************************** * Page Table Flags @@ -60,7 +61,8 @@ #define _PAGE_EXECUTE (1<<3) /* Page has user execute perm (H) */ #define _PAGE_WRITE (1<<4) /* Page has user write perm (H) */ #define _PAGE_READ (1<<5) /* Page has user read perm (H) */ -#define _PAGE_MODIFIED (1<<6) /* Page modified (dirty) (S) */ +#define _PAGE_DIRTY (1<<6) /* Page modified (dirty) (S) */ +#define _PAGE_SPECIAL (1<<7) #define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ #define _PAGE_PRESENT (1<<10) /* TLB entry is valid (H) */ @@ -71,7 +73,8 @@ #define _PAGE_WRITE (1<<2) /* Page has user write perm (H) */ #define _PAGE_READ (1<<3) /* Page has user read perm (H) */ #define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */ -#define _PAGE_MODIFIED (1<<5) /* Page modified (dirty) (S) */ +#define _PAGE_DIRTY (1<<5) /* Page modified (dirty) (S) */ +#define _PAGE_SPECIAL (1<<6) #if (CONFIG_ARC_MMU_VER >= 4) #define _PAGE_WTHRU (1<<7) /* Page cache mode write-thru (H) */ @@ -81,32 +84,33 @@ #define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */ #if (CONFIG_ARC_MMU_VER >= 4) -#define _PAGE_SZ (1<<10) /* Page Size indicator (H) */ +#define _PAGE_HW_SZ (1<<10) /* Page Size indicator (H): 0 normal, 1 super */ #endif #define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr usable for shared TLB entries (H) */ + +#define _PAGE_UNUSED_BIT (1<<12) #endif /* vmalloc permissions */ #define _K_PAGE_PERMS (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \ _PAGE_GLOBAL | _PAGE_PRESENT) -#ifdef CONFIG_ARC_CACHE_PAGES -#define _PAGE_DEF_CACHEABLE _PAGE_CACHEABLE -#else -#define _PAGE_DEF_CACHEABLE (0) +#ifndef CONFIG_ARC_CACHE_PAGES +#undef _PAGE_CACHEABLE +#define _PAGE_CACHEABLE 0 #endif -/* Helper for every "user" page - * -kernel can R/W/X - * -by default cached, unless config otherwise - * -present in memory - */ -#define ___DEF (_PAGE_PRESENT | _PAGE_DEF_CACHEABLE) +#ifndef _PAGE_HW_SZ +#define _PAGE_HW_SZ 0 +#endif + +/* Defaults for every user page */ +#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE) /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_MODIFIED) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) /* More Abbrevaited helpers */ #define PAGE_U_NONE __pgprot(___DEF) @@ -122,15 +126,20 @@ * user vaddr space - visible in all addr spaces, but kernel mode only * Thus Global, all-kernel-access, no-user-access, cached */ -#define PAGE_KERNEL __pgprot(_K_PAGE_PERMS | _PAGE_DEF_CACHEABLE) +#define PAGE_KERNEL __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE) /* ioremap */ #define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS) /* Masks for actual TLB "PD"s */ -#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT) +#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) #define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) + +#ifdef CONFIG_ARC_HAS_PAE40 +#define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE) +#else #define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE) +#endif /************************************************************************** * Mapping of vm_flags (Generic VM) to PTE flags (arch specific) @@ -191,26 +200,22 @@ /* Optimal Sizing of Pg Tbl - based on MMU page size */ #if defined(CONFIG_ARC_PAGE_SIZE_8K) -#define BITS_FOR_PTE 8 +#define BITS_FOR_PTE 8 /* 11:8:13 */ #elif defined(CONFIG_ARC_PAGE_SIZE_16K) -#define BITS_FOR_PTE 8 +#define BITS_FOR_PTE 8 /* 10:8:14 */ #elif defined(CONFIG_ARC_PAGE_SIZE_4K) -#define BITS_FOR_PTE 9 +#define BITS_FOR_PTE 9 /* 11:9:12 */ #endif #define BITS_FOR_PGD (32 - BITS_FOR_PTE - BITS_IN_PAGE) -#define PGDIR_SHIFT (BITS_FOR_PTE + BITS_IN_PAGE) +#define PGDIR_SHIFT (32 - BITS_FOR_PGD) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) /* vaddr span, not PDG sz */ #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#ifdef __ASSEMBLY__ -#define PTRS_PER_PTE (1 << BITS_FOR_PTE) -#define PTRS_PER_PGD (1 << BITS_FOR_PGD) -#else -#define PTRS_PER_PTE (1UL << BITS_FOR_PTE) -#define PTRS_PER_PGD (1UL << BITS_FOR_PGD) -#endif +#define PTRS_PER_PTE _BITUL(BITS_FOR_PTE) +#define PTRS_PER_PGD _BITUL(BITS_FOR_PGD) + /* * Number of entries a user land program use. * TASK_SIZE is the maximum vaddr that can be used by a userland program. @@ -270,15 +275,10 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) (unsigned long)(((pte_val(x) - CONFIG_LINUX_LINK_BASE) >> \ PAGE_SHIFT))) -#define mk_pte(page, pgprot) \ -({ \ - pte_t pte; \ - pte_val(pte) = __pa(page_address(page)) + pgprot_val(pgprot); \ - pte; \ -}) - +#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) #define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) -#define pfn_pte(pfn, prot) (__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define pfn_pte(pfn, prot) (__pte(((pte_t)(pfn) << PAGE_SHIFT) | \ + pgprot_val(prot))) #define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) /* @@ -295,23 +295,26 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) /* Zoo of pte_xxx function */ #define pte_read(pte) (pte_val(pte) & _PAGE_READ) #define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) -#define pte_dirty(pte) (pte_val(pte) & _PAGE_MODIFIED) +#define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY) #define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED) -#define pte_special(pte) (0) +#define pte_special(pte) (pte_val(pte) & _PAGE_SPECIAL) #define PTE_BIT_FUNC(fn, op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } +PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT)); PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE)); PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE)); -PTE_BIT_FUNC(mkclean, &= ~(_PAGE_MODIFIED)); -PTE_BIT_FUNC(mkdirty, |= (_PAGE_MODIFIED)); +PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY)); +PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY)); PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED)); PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED)); PTE_BIT_FUNC(exprotect, &= ~(_PAGE_EXECUTE)); PTE_BIT_FUNC(mkexec, |= (_PAGE_EXECUTE)); +PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL)); +PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ)); -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } +#define __HAVE_ARCH_PTE_SPECIAL static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { @@ -357,7 +360,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, #define pgd_offset_fast(mm, addr) pgd_offset(mm, addr) #endif -extern void paging_init(void); extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE); void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); @@ -383,6 +385,10 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, * remap a physical page `pfn' of size `size' with page protection `prot' * into virtual address `from' */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#include +#endif + #include /* to cope with aliasing VIPT cache */ diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index ee682d8e0213..44545354e9e8 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -114,7 +114,12 @@ extern unsigned int get_wchan(struct task_struct *p); * ----------------------------------------------------------------------------- */ #define VMALLOC_START 0x70000000 -#define VMALLOC_SIZE (PAGE_OFFSET - VMALLOC_START) + +/* + * 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter + * See asm/highmem.h for details + */ +#define VMALLOC_SIZE (PAGE_OFFSET - VMALLOC_START - PGDIR_SIZE * 4) #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) #define USER_KERNEL_GUTTER 0x10000000 diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h index 6e3ef5ba4f74..307846691be6 100644 --- a/arch/arc/include/asm/setup.h +++ b/arch/arc/include/asm/setup.h @@ -33,4 +33,11 @@ extern int root_mountflags, end_mem; void setup_processor(void); void __init setup_arch_memory(void); +/* Helpers used in arc_*_mumbojumbo routines */ +#define IS_AVAIL1(v, s) ((v) ? s : "") +#define IS_DISABLED_RUN(v) ((v) ? "" : "(disabled) ") +#define IS_USED_RUN(v) ((v) ? "" : "(not used) ") +#define IS_USED_CFG(cfg) IS_USED_RUN(IS_ENABLED(cfg)) +#define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) + #endif /* __ASMARC_SETUP_H */ diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index 3845b9e94f69..133c867d15af 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -45,12 +45,19 @@ extern int smp_ipi_irq_setup(int cpu, int irq); * struct plat_smp_ops - SMP callbacks provided by platform to ARC SMP * * @info: SoC SMP specific info for /proc/cpuinfo etc + * @init_early_smp: A SMP specific h/w block can init itself + * Could be common across platforms so not covered by + * mach_desc->init_early() + * @init_irq_cpu: Called for each core so SMP h/w block driver can do + * any needed setup per cpu (e.g. IPI request) * @cpu_kick: For Master to kickstart a cpu (optionally at a PC) * @ipi_send: To send IPI to a @cpu * @ips_clear: To clear IPI received at @irq */ struct plat_smp_ops { const char *info; + void (*init_early_smp)(void); + void (*init_irq_cpu)(int cpu); void (*cpu_kick)(int cpu, unsigned long pc); void (*ipi_send)(int cpu); void (*ipi_clear)(int irq); diff --git a/arch/arc/include/asm/tlbflush.h b/arch/arc/include/asm/tlbflush.h index 71c7b2e4b874..1fe9c8c80280 100644 --- a/arch/arc/include/asm/tlbflush.h +++ b/arch/arc/include/asm/tlbflush.h @@ -17,6 +17,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page); void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); #ifndef CONFIG_SMP #define flush_tlb_range(vma, s, e) local_flush_tlb_range(vma, s, e) @@ -24,6 +26,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, #define flush_tlb_kernel_range(s, e) local_flush_tlb_kernel_range(s, e) #define flush_tlb_all() local_flush_tlb_all() #define flush_tlb_mm(mm) local_flush_tlb_mm(mm) +#define flush_pmd_tlb_range(vma, s, e) local_flush_pmd_tlb_range(vma, s, e) #else extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -31,5 +34,7 @@ extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); extern void flush_tlb_all(void); extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); + #endif /* CONFIG_SMP */ #endif diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h index 9d129a2a1351..059aff38f10a 100644 --- a/arch/arc/include/uapi/asm/page.h +++ b/arch/arc/include/uapi/asm/page.h @@ -9,6 +9,8 @@ #ifndef _UAPI__ASM_ARC_PAGE_H #define _UAPI__ASM_ARC_PAGE_H +#include + /* PAGE_SHIFT determines the page size */ #if defined(CONFIG_ARC_PAGE_SIZE_16K) #define PAGE_SHIFT 14 @@ -25,13 +27,8 @@ #define PAGE_SHIFT 13 #endif -#ifdef __ASSEMBLY__ -#define PAGE_SIZE (1 << PAGE_SHIFT) -#define PAGE_OFFSET (0x80000000) -#else -#define PAGE_SIZE (1UL << PAGE_SHIFT) /* Default 8K */ -#define PAGE_OFFSET (0x80000000UL) /* Kernel starts at 2G onwards */ -#endif +#define PAGE_SIZE _BITUL(PAGE_SHIFT) /* Default 8K */ +#define PAGE_OFFSET _AC(0x80000000, UL) /* Kernel starts at 2G onwrds */ #define PAGE_MASK (~(PAGE_SIZE-1)) diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S index 8fa76567e402..445e63a10754 100644 --- a/arch/arc/kernel/entry-arcv2.S +++ b/arch/arc/kernel/entry-arcv2.S @@ -24,7 +24,7 @@ .align 4 # Initial 16 slots are Exception Vectors -VECTOR stext ; Restart Vector (jump to entry point) +VECTOR res_service ; Reset Vector VECTOR mem_service ; Mem exception VECTOR instr_service ; Instrn Error VECTOR EV_MachineCheck ; Fatal Machine check diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S index 15d457b4403a..59f52035b4ea 100644 --- a/arch/arc/kernel/entry-compact.S +++ b/arch/arc/kernel/entry-compact.S @@ -86,7 +86,7 @@ */ ; ********* Critical System Events ********************** -VECTOR res_service ; 0x0, Restart Vector (0x0) +VECTOR res_service ; 0x0, Reset Vector (0x0) VECTOR mem_service ; 0x8, Mem exception (0x1) VECTOR instr_service ; 0x10, Instrn Error (0x2) @@ -155,13 +155,9 @@ int2_saved_reg: ; --------------------------------------------- .section .text, "ax",@progbits -res_service: ; processor restart - flag 0x1 ; not implemented - nop - nop -reserved: ; processor restart - rtie ; jump to processor initializations +reserved: + flag 1 ; Unexpected event, halt ;##################### Interrupt Handling ############################## @@ -175,12 +171,25 @@ ENTRY(handle_interrupt_level2) ;------------------------------------------------------ ; if L2 IRQ interrupted a L1 ISR, disable preemption + ; + ; This is to avoid a potential L1-L2-L1 scenario + ; -L1 IRQ taken + ; -L2 interrupts L1 (before L1 ISR could run) + ; -preemption off IRQ, user task in syscall picked to run + ; -RTIE to userspace + ; Returns from L2 context fine + ; But both L1 and L2 re-enabled, so another L1 can be taken + ; while prev L1 is still unserviced + ; ;------------------------------------------------------ + ; L2 interrupting L1 implies both L2 and L1 active + ; However both A2 and A1 are NOT set in STATUS32, thus + ; need to check STATUS32_L2 to determine if L1 was active + ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs) bbit0 r9, STATUS_A1_BIT, 1f ; L1 not active when L2 IRQ, so normal - ; A1 is set in status32_l2 ; bump thread_info->preempt_count (Disable preemption) GET_CURR_THR_INFO_FROM_SP r10 ld r9, [r10, THREAD_INFO_PREEMPT_COUNT] @@ -320,11 +329,10 @@ END(call_do_page_fault) ; Note that we use realtime STATUS32 (not pt_regs->status32) to ; decide that. - ; if Returning from Exception - btst r10, STATUS_AE_BIT - bnz .Lexcep_ret + and.f 0, r10, (STATUS_A1_MASK|STATUS_A2_MASK) + bz .Lexcep_or_pure_K_ret - ; Not Exception so maybe Interrupts (Level 1 or 2) + ; Returning from Interrupts (Level 1 or 2) #ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS @@ -365,8 +373,7 @@ END(call_do_page_fault) st r9, [r10, THREAD_INFO_PREEMPT_COUNT] 149: - ;return from level 2 - INTERRUPT_EPILOGUE 2 + INTERRUPT_EPILOGUE 2 ; return from level 2 interrupt debug_marker_l2: rtie @@ -374,15 +381,11 @@ not_level2_interrupt: #endif - bbit0 r10, STATUS_A1_BIT, .Lpure_k_mode_ret - - ;return from level 1 - INTERRUPT_EPILOGUE 1 + INTERRUPT_EPILOGUE 1 ; return from level 1 interrupt debug_marker_l1: rtie -.Lexcep_ret: -.Lpure_k_mode_ret: +.Lexcep_or_pure_K_ret: ;this case is for syscalls or Exceptions or pure kernel mode diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 812f95e6ae69..689dd867fdff 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -50,28 +50,37 @@ .endm .section .init.text, "ax",@progbits - .type stext, @function - .globl stext -stext: - ;------------------------------------------------------------------- - ; Don't clobber r0-r2 yet. It might have bootloader provided info - ;------------------------------------------------------------------- + +;---------------------------------------------------------------- +; Default Reset Handler (jumped into from Reset vector) +; - Don't clobber r0,r1,r2 as they might have u-boot provided args +; - Platforms can override this weak version if needed +;---------------------------------------------------------------- +WEAK(res_service) + j stext +END(res_service) + +;---------------------------------------------------------------- +; Kernel Entry point +;---------------------------------------------------------------- +ENTRY(stext) CPU_EARLY_SETUP #ifdef CONFIG_SMP - ; Ensure Boot (Master) proceeds. Others wait in platform dependent way - ; IDENTITY Reg [ 3 2 1 0 ] - ; (cpu-id) ^^^ => Zero for UP ARC700 - ; => #Core-ID if SMP (Master 0) - ; Note that non-boot CPUs might not land here if halt-on-reset and - ; instead breath life from @first_lines_of_secondary, but we still - ; need to make sure only boot cpu takes this path. GET_CPU_ID r5 cmp r5, 0 - mov.ne r0, r5 - jne arc_platform_smp_wait_to_boot + mov.nz r0, r5 +#ifdef CONFIG_ARC_SMP_HALT_ON_RESET + ; Non-Master can proceed as system would be booted sufficiently + jnz first_lines_of_secondary +#else + ; Non-Masters wait for Master to boot enough and bring them up + jnz arc_platform_smp_wait_to_boot #endif + ; Master falls thru +#endif + ; Clear BSS before updating any globals ; XXX: use ZOL here mov r5, __bss_start @@ -102,18 +111,14 @@ stext: GET_TSK_STACK_BASE r9, sp ; r9 = tsk, sp = stack base(output) j start_kernel ; "C" entry point +END(stext) #ifdef CONFIG_SMP ;---------------------------------------------------------------- ; First lines of code run by secondary before jumping to 'C' ;---------------------------------------------------------------- .section .text, "ax",@progbits - .type first_lines_of_secondary, @function - .globl first_lines_of_secondary - -first_lines_of_secondary: - - CPU_EARLY_SETUP +ENTRY(first_lines_of_secondary) ; setup per-cpu idle task as "current" on this CPU ld r0, [@secondary_idle_tsk] @@ -126,5 +131,5 @@ first_lines_of_secondary: GET_TSK_STACK_BASE r0, sp j start_kernel_secondary - +END(first_lines_of_secondary) #endif diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index 039fac30b5c1..06bcedf19b62 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -79,17 +79,16 @@ static struct irq_chip onchip_intc = { static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { - /* - * XXX: the IPI IRQ needs to be handled like TIMER too. However ARC core - * code doesn't own it (like TIMER0). ISS IDU / ezchip define it - * in platform header which can't be included here as it goes - * against multi-platform image philisophy - */ - if (irq == TIMER0_IRQ) + switch (irq) { + case TIMER0_IRQ: +#ifdef CONFIG_SMP + case IPI_IRQ: +#endif irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq); - else + break; + default: irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq); - + } return 0; } @@ -148,78 +147,15 @@ IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ); void arch_local_irq_enable(void) { - unsigned long flags = arch_local_save_flags(); - /* Allow both L1 and L2 at the onset */ - flags |= (STATUS_E1_MASK | STATUS_E2_MASK); - - /* Called from hard ISR (between irq_enter and irq_exit) */ - if (in_irq()) { - - /* If in L2 ISR, don't re-enable any further IRQs as this can - * cause IRQ priorities to get upside down. e.g. it could allow - * L1 be taken while in L2 hard ISR which is wrong not only in - * theory, it can also cause the dreaded L1-L2-L1 scenario - */ - if (flags & STATUS_A2_MASK) - flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); - - /* Even if in L1 ISR, allowe Higher prio L2 IRQs */ - else if (flags & STATUS_A1_MASK) - flags &= ~(STATUS_E1_MASK); - } - - /* called from soft IRQ, ideally we want to re-enable all levels */ - - else if (in_softirq()) { - - /* However if this is case of L1 interrupted by L2, - * re-enabling both may cause whaco L1-L2-L1 scenario - * because ARC700 allows level 1 to interrupt an active L2 ISR - * Thus we disable both - * However some code, executing in soft ISR wants some IRQs - * to be enabled so we re-enable L2 only - * - * How do we determine L1 intr by L2 - * -A2 is set (means in L2 ISR) - * -E1 is set in this ISR's pt_regs->status32 which is - * saved copy of status32_l2 when l2 ISR happened - */ - struct pt_regs *pt = get_irq_regs(); - - if ((flags & STATUS_A2_MASK) && pt && - (pt->status32 & STATUS_A1_MASK)) { - /*flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); */ - flags &= ~(STATUS_E1_MASK); - } - } + if (flags & STATUS_A2_MASK) + flags |= STATUS_E2_MASK; + else if (flags & STATUS_A1_MASK) + flags |= STATUS_E1_MASK; arch_local_irq_restore(flags); } -#else /* ! CONFIG_ARC_COMPACT_IRQ_LEVELS */ - -/* - * Simpler version for only 1 level of interrupt - * Here we only Worry about Level 1 Bits - */ -void arch_local_irq_enable(void) -{ - unsigned long flags; - - /* - * ARC IDE Drivers tries to re-enable interrupts from hard-isr - * context which is simply wrong - */ - if (in_irq()) { - WARN_ONCE(1, "IRQ enabled from hard-isr"); - return; - } - - flags = arch_local_save_flags(); - flags |= (STATUS_E1_MASK | STATUS_E2_MASK); - arch_local_irq_restore(flags); -} -#endif EXPORT_SYMBOL(arch_local_irq_enable); +#endif diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 2989a7bcf8a8..2ee226546c6a 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -10,6 +10,7 @@ #include #include #include +#include /* * Late Interrupt system init called from start_kernel for Boot CPU only @@ -19,17 +20,20 @@ */ void __init init_IRQ(void) { - /* Any external intc can be setup here */ - if (machine_desc->init_irq) - machine_desc->init_irq(); - - /* process the entire interrupt tree in one go */ + /* + * process the entire interrupt tree in one go + * Any external intc will be setup provided DT chains them + * properly + */ irqchip_init(); #ifdef CONFIG_SMP - /* Master CPU can initialize it's side of IPI */ - if (machine_desc->init_smp) - machine_desc->init_smp(smp_processor_id()); + /* a SMP H/w block could do IPI IRQ request here */ + if (plat_smp_ops.init_irq_cpu) + plat_smp_ops.init_irq_cpu(smp_processor_id()); + + if (machine_desc->init_cpu_smp) + machine_desc->init_cpu_smp(smp_processor_id()); #endif } diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 4ffd1855f1bd..74a9b074ac3e 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -12,20 +12,14 @@ #include #include #include +#include static char smp_cpuinfo_buf[128]; static int idu_detected; static DEFINE_RAW_SPINLOCK(mcip_lock); -/* - * Any SMP specific init any CPU does when it comes up. - * Here we setup the CPU to enable Inter-Processor-Interrupts - * Called for each CPU - * -Master : init_IRQ() - * -Other(s) : start_kernel_secondary() - */ -void mcip_init_smp(unsigned int cpu) +static void mcip_setup_per_cpu(int cpu) { smp_ipi_irq_setup(cpu, IPI_IRQ); } @@ -96,34 +90,8 @@ static void mcip_ipi_clear(int irq) #endif } -volatile int wake_flag; - -static void mcip_wakeup_cpu(int cpu, unsigned long pc) +static void mcip_probe_n_setup(void) { - BUG_ON(cpu == 0); - wake_flag = cpu; -} - -void arc_platform_smp_wait_to_boot(int cpu) -{ - while (wake_flag != cpu) - ; - - wake_flag = 0; - __asm__ __volatile__("j @first_lines_of_secondary \n"); -} - -struct plat_smp_ops plat_smp_ops = { - .info = smp_cpuinfo_buf, - .cpu_kick = mcip_wakeup_cpu, - .ipi_send = mcip_ipi_send, - .ipi_clear = mcip_ipi_clear, -}; - -void mcip_init_early_smp(void) -{ -#define IS_AVAIL1(var, str) ((var) ? str : "") - struct mcip_bcr { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int pad3:8, @@ -161,6 +129,14 @@ void mcip_init_early_smp(void) panic("kernel trying to use non-existent GRTC\n"); } +struct plat_smp_ops plat_smp_ops = { + .info = smp_cpuinfo_buf, + .init_early_smp = mcip_probe_n_setup, + .init_irq_cpu = mcip_setup_per_cpu, + .ipi_send = mcip_ipi_send, + .ipi_clear = mcip_ipi_clear, +}; + /*************************************************************************** * ARCv2 Interrupt Distribution Unit (IDU) * diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index cabde9dc0696..c33e77c0ad3e 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -160,10 +160,6 @@ static const struct cpuinfo_data arc_cpu_tbl[] = { { {0x00, NULL } } }; -#define IS_AVAIL1(v, s) ((v) ? s : "") -#define IS_USED_RUN(v) ((v) ? "" : "(not used) ") -#define IS_USED_CFG(cfg) IS_USED_RUN(IS_ENABLED(cfg)) -#define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) { @@ -415,8 +411,9 @@ void __init setup_arch(char **cmdline_p) if (machine_desc->init_early) machine_desc->init_early(); - setup_processor(); smp_init_cpus(); + + setup_processor(); setup_arch_memory(); /* copy flat DT out of .init and then unflatten it */ diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index be13d12420ba..580587805fa3 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -42,8 +42,13 @@ void __init smp_prepare_boot_cpu(void) } /* - * Initialise the CPU possible map early - this describes the CPUs - * which may be present or become present in the system. + * Called from setup_arch() before calling setup_processor() + * + * - Initialise the CPU possible map early - this describes the CPUs + * which may be present or become present in the system. + * - Call early smp init hook. This can initialize a specific multi-core + * IP which is say common to several platforms (hence not part of + * platform specific int_early() hook) */ void __init smp_init_cpus(void) { @@ -51,6 +56,9 @@ void __init smp_init_cpus(void) for (i = 0; i < NR_CPUS; i++) set_cpu_possible(i, true); + + if (plat_smp_ops.init_early_smp) + plat_smp_ops.init_early_smp(); } /* called from init ( ) => process 1 */ @@ -72,35 +80,29 @@ void __init smp_cpus_done(unsigned int max_cpus) } /* - * After power-up, a non Master CPU needs to wait for Master to kick start it - * - * The default implementation halts - * - * This relies on platform specific support allowing Master to directly set - * this CPU's PC (to be @first_lines_of_secondary() and kick start it. - * - * In lack of such h/w assist, platforms can override this function - * - make this function busy-spin on a token, eventually set by Master - * (from arc_platform_smp_wakeup_cpu()) - * - Once token is available, jump to @first_lines_of_secondary - * (using inline asm). - * - * Alert: can NOT use stack here as it has not been determined/setup for CPU. - * If it turns out to be elaborate, it's better to code it in assembly - * + * Default smp boot helper for Run-on-reset case where all cores start off + * together. Non-masters need to wait for Master to start running. + * This is implemented using a flag in memory, which Non-masters spin-wait on. + * Master sets it to cpu-id of core to "ungate" it. */ -void __weak arc_platform_smp_wait_to_boot(int cpu) +static volatile int wake_flag; + +static void arc_default_smp_cpu_kick(int cpu, unsigned long pc) { - /* - * As a hack for debugging - since debugger will single-step over the - * FLAG insn - wrap the halt itself it in a self loop - */ - __asm__ __volatile__( - "1: \n" - " flag 1 \n" - " b 1b \n"); + BUG_ON(cpu == 0); + wake_flag = cpu; } +void arc_platform_smp_wait_to_boot(int cpu) +{ + while (wake_flag != cpu) + ; + + wake_flag = 0; + __asm__ __volatile__("j @first_lines_of_secondary \n"); +} + + const char *arc_platform_smp_cpuinfo(void) { return plat_smp_ops.info ? : ""; @@ -129,8 +131,12 @@ void start_kernel_secondary(void) pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu); - if (machine_desc->init_smp) - machine_desc->init_smp(cpu); + /* Some SMP H/w setup - for each cpu */ + if (plat_smp_ops.init_irq_cpu) + plat_smp_ops.init_irq_cpu(cpu); + + if (machine_desc->init_cpu_smp) + machine_desc->init_cpu_smp(cpu); arc_local_timer_setup(); @@ -161,6 +167,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) if (plat_smp_ops.cpu_kick) plat_smp_ops.cpu_kick(cpu, (unsigned long)first_lines_of_secondary); + else + arc_default_smp_cpu_kick(cpu, (unsigned long)NULL); /* wait for 1 sec after kicking the secondary */ wait_till = jiffies + HZ; diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c index 4294761a2b3e..dfad287f1db1 100644 --- a/arch/arc/kernel/time.c +++ b/arch/arc/kernel/time.c @@ -285,7 +285,4 @@ void __init time_init(void) /* sets up the periodic event timer */ arc_local_timer_setup(); - - if (machine_desc->init_time) - machine_desc->init_time(); } diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index dd35bde39f69..894e696bddaa 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -12,7 +12,7 @@ #include OUTPUT_ARCH(arc) -ENTRY(_stext) +ENTRY(res_service) #ifdef CONFIG_CPU_BIG_ENDIAN jiffies = jiffies_64 + 4; diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile index 7beb941556c3..3703a4969349 100644 --- a/arch/arc/mm/Makefile +++ b/arch/arc/mm/Makefile @@ -8,3 +8,4 @@ obj-y := extable.o ioremap.o dma.o fault.o init.o obj-y += tlb.o tlbex.o cache.o mmap.o +obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 0d1a6e96839f..ff7ff6cbb811 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -25,7 +25,7 @@ static int l2_line_sz; int ioc_exists; volatile int slc_enable = 1, ioc_enable = 1; -void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr, +void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int cacheop); void (*__dma_cache_wback_inv)(unsigned long start, unsigned long sz); @@ -37,7 +37,6 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) int n = 0; struct cpuinfo_arc_cache *p; -#define IS_USED_RUN(v) ((v) ? "" : "(disabled) ") #define PR_CACHE(p, cfg, str) \ if (!(p)->ver) \ n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \ @@ -47,7 +46,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) (p)->sz_k, (p)->assoc, (p)->line_len, \ (p)->vipt ? "VIPT" : "PIPT", \ (p)->alias ? " aliasing" : "", \ - IS_ENABLED(cfg) ? "" : " (not used)"); + IS_USED_CFG(cfg)); PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache"); PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache"); @@ -63,7 +62,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) if (ioc_exists) n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n", - IS_USED_RUN(ioc_enable)); + IS_DISABLED_RUN(ioc_enable)); return buf; } @@ -217,7 +216,7 @@ slc_chk: */ static inline -void __cache_line_loop_v2(unsigned long paddr, unsigned long vaddr, +void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op) { unsigned int aux_cmd; @@ -254,8 +253,12 @@ void __cache_line_loop_v2(unsigned long paddr, unsigned long vaddr, } } +/* + * For ARC700 MMUv3 I-cache and D-cache flushes + * Also reused for HS38 aliasing I-cache configuration + */ static inline -void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr, +void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op) { unsigned int aux_cmd, aux_tag; @@ -290,6 +293,16 @@ void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr, if (full_page) write_aux_reg(aux_tag, paddr); + /* + * This is technically for MMU v4, using the MMU v3 programming model + * Special work for HS38 aliasing I-cache configuratino with PAE40 + * - upper 8 bits of paddr need to be written into PTAG_HI + * - (and needs to be written before the lower 32 bits) + * Note that PTAG_HI is hoisted outside the line loop + */ + if (is_pae40_enabled() && op == OP_INV_IC) + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + while (num_lines-- > 0) { if (!full_page) { write_aux_reg(aux_tag, paddr); @@ -302,14 +315,20 @@ void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr, } /* - * In HS38x (MMU v4), although icache is VIPT, only paddr is needed for cache - * maintenance ops (in IVIL reg), as long as icache doesn't alias. + * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT + * Here's how cache ops are implemented * - * For Aliasing icache, vaddr is also needed (in IVIL), while paddr is - * specified in PTAG (similar to MMU v3) + * - D-cache: only paddr needed (in DC_IVDL/DC_FLDL) + * - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL) + * - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG + * respectively, similar to MMU v3 programming model, hence + * __cache_line_loop_v3() is used) + * + * If PAE40 is enabled, independent of aliasing considerations, the higher bits + * needs to be written into PTAG_HI */ static inline -void __cache_line_loop_v4(unsigned long paddr, unsigned long vaddr, +void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int cacheop) { unsigned int aux_cmd; @@ -336,6 +355,22 @@ void __cache_line_loop_v4(unsigned long paddr, unsigned long vaddr, num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + /* + * For HS38 PAE40 configuration + * - upper 8 bits of paddr need to be written into PTAG_HI + * - (and needs to be written before the lower 32 bits) + */ + if (is_pae40_enabled()) { + if (cacheop == OP_INV_IC) + /* + * Non aliasing I-cache in HS38, + * aliasing I-cache handled in __cache_line_loop_v3() + */ + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + else + write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32); + } + while (num_lines-- > 0) { write_aux_reg(aux_cmd, paddr); paddr += L1_CACHE_BYTES; @@ -413,7 +448,7 @@ static inline void __dc_entire_op(const int op) /* * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback) */ -static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr, +static inline void __dc_line_op(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op) { unsigned long flags; @@ -446,7 +481,7 @@ static inline void __ic_entire_inv(void) } static inline void -__ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr, +__ic_line_inv_vaddr_local(phys_addr_t paddr, unsigned long vaddr, unsigned long sz) { unsigned long flags; @@ -463,7 +498,7 @@ __ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr, #else struct ic_inv_args { - unsigned long paddr, vaddr; + phys_addr_t paddr, vaddr; int sz; }; @@ -474,7 +509,7 @@ static void __ic_line_inv_vaddr_helper(void *info) __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz); } -static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, +static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr, unsigned long sz) { struct ic_inv_args ic_inv = { @@ -495,7 +530,7 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, #endif /* CONFIG_ARC_HAS_ICACHE */ -noinline void slc_op(unsigned long paddr, unsigned long sz, const int op) +noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) { #ifdef CONFIG_ISA_ARCV2 /* @@ -585,7 +620,7 @@ void flush_dcache_page(struct page *page) } else if (page_mapped(page)) { /* kernel reading from page with U-mapping */ - unsigned long paddr = (unsigned long)page_address(page); + phys_addr_t paddr = (unsigned long)page_address(page); unsigned long vaddr = page->index << PAGE_CACHE_SHIFT; if (addr_not_cache_congruent(paddr, vaddr)) @@ -733,14 +768,14 @@ EXPORT_SYMBOL(flush_icache_range); * builtin kernel page will not have any virtual mappings. * kprobe on loadable module will be kernel vaddr. */ -void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len) +void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len) { __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV); __ic_line_inv_vaddr(paddr, vaddr, len); } /* wrapper to compile time eliminate alignment checks in flush loop */ -void __inv_icache_page(unsigned long paddr, unsigned long vaddr) +void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr) { __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE); } @@ -749,7 +784,7 @@ void __inv_icache_page(unsigned long paddr, unsigned long vaddr) * wrapper to clearout kernel or userspace mappings of a page * For kernel mappings @vaddr == @paddr */ -void __flush_dcache_page(unsigned long paddr, unsigned long vaddr) +void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr) { __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV); } @@ -807,8 +842,8 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, void copy_user_highpage(struct page *to, struct page *from, unsigned long u_vaddr, struct vm_area_struct *vma) { - unsigned long kfrom = (unsigned long)page_address(from); - unsigned long kto = (unsigned long)page_address(to); + void *kfrom = kmap_atomic(from); + void *kto = kmap_atomic(to); int clean_src_k_mappings = 0; /* @@ -818,13 +853,16 @@ void copy_user_highpage(struct page *to, struct page *from, * * Note that while @u_vaddr refers to DST page's userspace vaddr, it is * equally valid for SRC page as well + * + * For !VIPT cache, all of this gets compiled out as + * addr_not_cache_congruent() is 0 */ if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { - __flush_dcache_page(kfrom, u_vaddr); + __flush_dcache_page((unsigned long)kfrom, u_vaddr); clean_src_k_mappings = 1; } - copy_page((void *)kto, (void *)kfrom); + copy_page(kto, kfrom); /* * Mark DST page K-mapping as dirty for a later finalization by @@ -841,11 +879,14 @@ void copy_user_highpage(struct page *to, struct page *from, * sync the kernel mapping back to physical page */ if (clean_src_k_mappings) { - __flush_dcache_page(kfrom, kfrom); + __flush_dcache_page((unsigned long)kfrom, (unsigned long)kfrom); set_bit(PG_dc_clean, &from->flags); } else { clear_bit(PG_dc_clean, &from->flags); } + + kunmap_atomic(kto); + kunmap_atomic(kfrom); } void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index d948e4e9d89c..af63f4a13e60 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -18,7 +18,14 @@ #include #include -static int handle_vmalloc_fault(unsigned long address) +/* + * kernel virtual address is required to implement vmalloc/pkmap/fixmap + * Refer to asm/processor.h for System Memory Map + * + * It simply copies the PMD entry (pointer to 2nd level page table or hugepage) + * from swapper pgdir to task pgdir. The 2nd level table/page is thus shared + */ +noinline static int handle_kernel_vaddr_fault(unsigned long address) { /* * Synchronize this task's top level page-table @@ -72,8 +79,8 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) * only copy the information from the master page table, * nothing more. */ - if (address >= VMALLOC_START && address <= VMALLOC_END) { - ret = handle_vmalloc_fault(address); + if (address >= VMALLOC_START) { + ret = handle_kernel_vaddr_fault(address); if (unlikely(ret)) goto bad_area_nosemaphore; else diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c new file mode 100644 index 000000000000..065ee6bfa82a --- /dev/null +++ b/arch/arc/mm/highmem.c @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * HIGHMEM API: + * + * kmap() API provides sleep semantics hence refered to as "permanent maps" + * It allows mapping LAST_PKMAP pages, using @last_pkmap_nr as the cursor + * for book-keeping + * + * kmap_atomic() can't sleep (calls pagefault_disable()), thus it provides + * shortlived ala "temporary mappings" which historically were implemented as + * fixmaps (compile time addr etc). Their book-keeping is done per cpu. + * + * Both these facts combined (preemption disabled and per-cpu allocation) + * means the total number of concurrent fixmaps will be limited to max + * such allocations in a single control path. Thus KM_TYPE_NR (another + * historic relic) is a small'ish number which caps max percpu fixmaps + * + * ARC HIGHMEM Details + * + * - the kernel vaddr space from 0x7z to 0x8z (currently used by vmalloc/module) + * is now shared between vmalloc and kmap (non overlapping though) + * + * - Both fixmap/pkmap use a dedicated page table each, hooked up to swapper PGD + * This means each only has 1 PGDIR_SIZE worth of kvaddr mappings, which means + * 2M of kvaddr space for typical config (8K page and 11:8:13 traversal split) + * + * - fixmap anyhow needs a limited number of mappings. So 2M kvaddr == 256 PTE + * slots across NR_CPUS would be more than sufficient (generic code defines + * KM_TYPE_NR as 20). + * + * - pkmap being preemptible, in theory could do with more than 256 concurrent + * mappings. However, generic pkmap code: map_new_virtual(), doesn't traverse + * the PGD and only works with a single page table @pkmap_page_table, hence + * sets the limit + */ + +extern pte_t * pkmap_page_table; +static pte_t * fixmap_page_table; + +void *kmap(struct page *page) +{ + BUG_ON(in_interrupt()); + if (!PageHighMem(page)) + return page_address(page); + + return kmap_high(page); +} + +void *kmap_atomic(struct page *page) +{ + int idx, cpu_idx; + unsigned long vaddr; + + preempt_disable(); + pagefault_disable(); + if (!PageHighMem(page)) + return page_address(page); + + cpu_idx = kmap_atomic_idx_push(); + idx = cpu_idx + KM_TYPE_NR * smp_processor_id(); + vaddr = FIXMAP_ADDR(idx); + + set_pte_at(&init_mm, vaddr, fixmap_page_table + idx, + mk_pte(page, kmap_prot)); + + return (void *)vaddr; +} +EXPORT_SYMBOL(kmap_atomic); + +void __kunmap_atomic(void *kv) +{ + unsigned long kvaddr = (unsigned long)kv; + + if (kvaddr >= FIXMAP_BASE && kvaddr < (FIXMAP_BASE + FIXMAP_SIZE)) { + + /* + * Because preemption is disabled, this vaddr can be associated + * with the current allocated index. + * But in case of multiple live kmap_atomic(), it still relies on + * callers to unmap in right order. + */ + int cpu_idx = kmap_atomic_idx(); + int idx = cpu_idx + KM_TYPE_NR * smp_processor_id(); + + WARN_ON(kvaddr != FIXMAP_ADDR(idx)); + + pte_clear(&init_mm, kvaddr, fixmap_page_table + idx); + local_flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE); + + kmap_atomic_idx_pop(); + } + + pagefault_enable(); + preempt_enable(); +} +EXPORT_SYMBOL(__kunmap_atomic); + +noinline pte_t *alloc_kmap_pgtable(unsigned long kvaddr) +{ + pgd_t *pgd_k; + pud_t *pud_k; + pmd_t *pmd_k; + pte_t *pte_k; + + pgd_k = pgd_offset_k(kvaddr); + pud_k = pud_offset(pgd_k, kvaddr); + pmd_k = pmd_offset(pud_k, kvaddr); + + pte_k = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmd_k, pte_k); + return pte_k; +} + +void kmap_init(void) +{ + /* Due to recursive include hell, we can't do this in processor.h */ + BUILD_BUG_ON(PAGE_OFFSET < (VMALLOC_END + FIXMAP_SIZE + PKMAP_SIZE)); + + BUILD_BUG_ON(KM_TYPE_NR > PTRS_PER_PTE); + pkmap_page_table = alloc_kmap_pgtable(PKMAP_BASE); + + BUILD_BUG_ON(LAST_PKMAP > PTRS_PER_PTE); + fixmap_page_table = alloc_kmap_pgtable(FIXMAP_BASE); +} diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index d44eedd8c322..a9305b5a2cd4 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -15,6 +15,7 @@ #endif #include #include +#include #include #include #include @@ -24,16 +25,22 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE); char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE); EXPORT_SYMBOL(empty_zero_page); -/* Default tot mem from .config */ -static unsigned long arc_mem_sz = 0x20000000; /* some default */ +static const unsigned long low_mem_start = CONFIG_LINUX_LINK_BASE; +static unsigned long low_mem_sz; + +#ifdef CONFIG_HIGHMEM +static unsigned long min_high_pfn; +static u64 high_mem_start; +static u64 high_mem_sz; +#endif /* User can over-ride above with "mem=nnn[KkMm]" in cmdline */ static int __init setup_mem_sz(char *str) { - arc_mem_sz = memparse(str, NULL) & PAGE_MASK; + low_mem_sz = memparse(str, NULL) & PAGE_MASK; /* early console might not be setup yet - it will show up later */ - pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(arc_mem_sz)); + pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(low_mem_sz)); return 0; } @@ -41,8 +48,22 @@ early_param("mem", setup_mem_sz); void __init early_init_dt_add_memory_arch(u64 base, u64 size) { - arc_mem_sz = size & PAGE_MASK; - pr_info("Memory size set via devicetree %ldM\n", TO_MB(arc_mem_sz)); + int in_use = 0; + + if (!low_mem_sz) { + BUG_ON(base != low_mem_start); + low_mem_sz = size; + in_use = 1; + } else { +#ifdef CONFIG_HIGHMEM + high_mem_start = base; + high_mem_sz = size; + in_use = 1; +#endif + } + + pr_info("Memory @ %llx [%lldM] %s\n", + base, TO_MB(size), !in_use ? "Not used":""); } #ifdef CONFIG_BLK_DEV_INITRD @@ -72,46 +93,62 @@ early_param("initrd", early_initrd); void __init setup_arch_memory(void) { unsigned long zones_size[MAX_NR_ZONES]; - unsigned long end_mem = CONFIG_LINUX_LINK_BASE + arc_mem_sz; + unsigned long zones_holes[MAX_NR_ZONES]; init_mm.start_code = (unsigned long)_text; init_mm.end_code = (unsigned long)_etext; init_mm.end_data = (unsigned long)_edata; init_mm.brk = (unsigned long)_end; - /* - * We do it here, so that memory is correctly instantiated - * even if "mem=xxx" cmline over-ride is given and/or - * DT has memory node. Each causes an update to @arc_mem_sz - * and we finally add memory one here - */ - memblock_add(CONFIG_LINUX_LINK_BASE, arc_mem_sz); - - /*------------- externs in mm need setting up ---------------*/ - /* first page of system - kernel .vector starts here */ min_low_pfn = ARCH_PFN_OFFSET; - /* Last usable page of low mem (no HIGHMEM yet for ARC port) */ - max_low_pfn = max_pfn = PFN_DOWN(end_mem); + /* Last usable page of low mem */ + max_low_pfn = max_pfn = PFN_DOWN(low_mem_start + low_mem_sz); - max_mapnr = max_low_pfn - min_low_pfn; +#ifdef CONFIG_HIGHMEM + min_high_pfn = PFN_DOWN(high_mem_start); + max_pfn = PFN_DOWN(high_mem_start + high_mem_sz); +#endif - /*------------- reserve kernel image -----------------------*/ - memblock_reserve(CONFIG_LINUX_LINK_BASE, - __pa(_end) - CONFIG_LINUX_LINK_BASE); + max_mapnr = max_pfn - min_low_pfn; + + /*------------- bootmem allocator setup -----------------------*/ + + /* + * seed the bootmem allocator after any DT memory node parsing or + * "mem=xxx" cmdline overrides have potentially updated @arc_mem_sz + * + * Only low mem is added, otherwise we have crashes when allocating + * mem_map[] itself. NO_BOOTMEM allocates mem_map[] at the end of + * avail memory, ending in highmem with a > 32-bit address. However + * it then tries to memset it with a truncaed 32-bit handle, causing + * the crash + */ + + memblock_add(low_mem_start, low_mem_sz); + memblock_reserve(low_mem_start, __pa(_end) - low_mem_start); #ifdef CONFIG_BLK_DEV_INITRD - /*------------- reserve initrd image -----------------------*/ if (initrd_start) memblock_reserve(__pa(initrd_start), initrd_end - initrd_start); #endif memblock_dump_all(); - /*-------------- node setup --------------------------------*/ + /*----------------- node/zones setup --------------------------*/ memset(zones_size, 0, sizeof(zones_size)); - zones_size[ZONE_NORMAL] = max_mapnr; + memset(zones_holes, 0, sizeof(zones_holes)); + + zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn; + zones_holes[ZONE_NORMAL] = 0; + +#ifdef CONFIG_HIGHMEM + zones_size[ZONE_HIGHMEM] = max_pfn - max_low_pfn; + + /* This handles the peripheral address space hole */ + zones_holes[ZONE_HIGHMEM] = min_high_pfn - max_low_pfn; +#endif /* * We can't use the helper free_area_init(zones[]) because it uses @@ -122,9 +159,12 @@ void __init setup_arch_memory(void) free_area_init_node(0, /* node-id */ zones_size, /* num pages per zone */ min_low_pfn, /* first pfn of node */ - NULL); /* NO holes */ + zones_holes); /* holes */ - high_memory = (void *)end_mem; +#ifdef CONFIG_HIGHMEM + high_memory = (void *)(min_high_pfn << PAGE_SHIFT); + kmap_init(); +#endif } /* @@ -135,6 +175,14 @@ void __init setup_arch_memory(void) */ void __init mem_init(void) { +#ifdef CONFIG_HIGHMEM + unsigned long tmp; + + reset_all_zones_managed_pages(); + for (tmp = min_high_pfn; tmp < max_pfn; tmp++) + free_highmem_page(pfn_to_page(tmp)); +#endif + free_all_bootmem(); mem_init_print_info(NULL); } diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 2c7ce8bb7475..0ee739846847 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -109,6 +109,10 @@ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; static inline void __tlb_entry_erase(void) { write_aux_reg(ARC_REG_TLBPD1, 0); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } @@ -182,7 +186,7 @@ static void utlb_invalidate(void) } -static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) +static void tlb_entry_insert(unsigned int pd0, pte_t pd1) { unsigned int idx; @@ -225,10 +229,14 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid) write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry); } -static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) +static void tlb_entry_insert(unsigned int pd0, pte_t pd1) { write_aux_reg(ARC_REG_TLBPD0, pd0); write_aux_reg(ARC_REG_TLBPD1, pd1); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry); } @@ -240,22 +248,39 @@ static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) noinline void local_flush_tlb_all(void) { + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; unsigned long flags; unsigned int entry; - struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + int num_tlb = mmu->sets * mmu->ways; local_irq_save(flags); /* Load PD0 and PD1 with template for a Blank Entry */ write_aux_reg(ARC_REG_TLBPD1, 0); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); - for (entry = 0; entry < mmu->num_tlb; entry++) { + for (entry = 0; entry < num_tlb; entry++) { /* write this entry to the TLB */ write_aux_reg(ARC_REG_TLBINDEX, entry); write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { + const int stlb_idx = 0x800; + + /* Blank sTLB entry */ + write_aux_reg(ARC_REG_TLBPD0, _PAGE_HW_SZ); + + for (entry = stlb_idx; entry < stlb_idx + 16; entry++) { + write_aux_reg(ARC_REG_TLBINDEX, entry); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); + } + } + utlb_invalidate(); local_irq_restore(flags); @@ -409,6 +434,15 @@ static inline void ipi_flush_tlb_range(void *arg) local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline void ipi_flush_pmd_tlb_range(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_pmd_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); +} +#endif + static inline void ipi_flush_tlb_kernel_range(void *arg) { struct tlb_args *ta = (struct tlb_args *)arg; @@ -449,6 +483,20 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = start, + .ta_end = end + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_pmd_tlb_range, &ta, 1); +} +#endif + void flush_tlb_kernel_range(unsigned long start, unsigned long end) { struct tlb_args ta = { @@ -463,11 +511,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) /* * Routine to create a TLB entry */ -void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep) { unsigned long flags; unsigned int asid_or_sasid, rwx; - unsigned long pd0, pd1; + unsigned long pd0; + pte_t pd1; /* * create_tlb() assumes that current->mm == vma->mm, since @@ -499,9 +548,9 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) local_irq_save(flags); - tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), address); + tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr); - address &= PAGE_MASK; + vaddr &= PAGE_MASK; /* update this PTE credentials */ pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED); @@ -511,7 +560,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) /* ASID for this task */ asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff; - pd0 = address | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0); + pd0 = vaddr | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0); /* * ARC MMU provides fully orthogonal access bits for K/U mode, @@ -547,7 +596,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, pte_t *ptep) { unsigned long vaddr = vaddr_unaligned & PAGE_MASK; - unsigned long paddr = pte_val(*ptep) & PAGE_MASK; + phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK; struct page *page = pfn_to_page(pte_pfn(*ptep)); create_tlb(vma, vaddr, ptep); @@ -580,6 +629,95 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, } } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +/* + * MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP + * support. + * + * Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a + * new bit "SZ" in TLB page desciptor to distinguish between them. + * Super Page size is configurable in hardware (4K to 16M), but fixed once + * RTL builds. + * + * The exact THP size a Linx configuration will support is a function of: + * - MMU page size (typical 8K, RTL fixed) + * - software page walker address split between PGD:PTE:PFN (typical + * 11:8:13, but can be changed with 1 line) + * So for above default, THP size supported is 8K * (2^8) = 2M + * + * Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime + * reduces to 1 level (as PTE is folded into PGD and canonically referred + * to as PMD). + * Thus THP PMD accessors are implemented in terms of PTE (just like sparc) + */ + +void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd) +{ + pte_t pte = __pte(pmd_val(*pmd)); + update_mmu_cache(vma, addr, &pte); +} + +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) +{ + struct list_head *lh = (struct list_head *) pgtable; + + assert_spin_locked(&mm->page_table_lock); + + /* FIFO */ + if (!pmd_huge_pte(mm, pmdp)) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; +} + +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) +{ + struct list_head *lh; + pgtable_t pgtable; + + assert_spin_locked(&mm->page_table_lock); + + pgtable = pmd_huge_pte(mm, pmdp); + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + pmd_huge_pte(mm, pmdp) = NULL; + else { + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; + list_del(lh); + } + + pte_val(pgtable[0]) = 0; + pte_val(pgtable[1]) = 0; + + return pgtable; +} + +void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + unsigned int cpu; + unsigned long flags; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + if (likely(asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID)) { + unsigned int asid = hw_pid(vma->vm_mm, cpu); + + /* No need to loop here: this will always be for 1 Huge Page */ + tlb_entry_erase(start | _PAGE_HW_SZ | asid); + } + + local_irq_restore(flags); +} + +#endif + /* Read the Cache Build Confuration Registers, Decode them and save into * the cpuinfo structure for later use. * No Validation is done here, simply read/convert the BCRs @@ -598,10 +736,10 @@ void read_decode_mmu_bcr(void) struct bcr_mmu_3 { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int ver:8, ways:4, sets:4, osm:1, reserv:3, pg_sz:4, + unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4, u_itlb:4, u_dtlb:4; #else - unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, reserv:3, osm:1, sets:4, + unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4, ways:4, ver:8; #endif } *mmu3; @@ -622,7 +760,7 @@ void read_decode_mmu_bcr(void) if (mmu->ver <= 2) { mmu2 = (struct bcr_mmu_1_2 *)&tmp; - mmu->pg_sz_k = TO_KB(PAGE_SIZE); + mmu->pg_sz_k = TO_KB(0x2000); mmu->sets = 1 << mmu2->sets; mmu->ways = 1 << mmu2->ways; mmu->u_dtlb = mmu2->u_dtlb; @@ -634,6 +772,7 @@ void read_decode_mmu_bcr(void) mmu->ways = 1 << mmu3->ways; mmu->u_dtlb = mmu3->u_dtlb; mmu->u_itlb = mmu3->u_itlb; + mmu->sasid = mmu3->sasid; } else { mmu4 = (struct bcr_mmu_4 *)&tmp; mmu->pg_sz_k = 1 << (mmu4->sz0 - 1); @@ -642,9 +781,9 @@ void read_decode_mmu_bcr(void) mmu->ways = mmu4->n_ways * 2; mmu->u_dtlb = mmu4->u_dtlb * 4; mmu->u_itlb = mmu4->u_itlb * 4; + mmu->sasid = mmu4->sasid; + mmu->pae = mmu4->pae; } - - mmu->num_tlb = mmu->sets * mmu->ways; } char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) @@ -655,14 +794,15 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) if (p_mmu->s_pg_sz_m) scnprintf(super_pg, 64, "%dM Super Page%s, ", - p_mmu->s_pg_sz_m, " (not used)"); + p_mmu->s_pg_sz_m, + IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE)); n += scnprintf(buf + n, len - n, - "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n", + "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n", p_mmu->ver, p_mmu->pg_sz_k, super_pg, - p_mmu->num_tlb, p_mmu->sets, p_mmu->ways, + p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways, p_mmu->u_dtlb, p_mmu->u_itlb, - IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : ""); + IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40)); return buf; } @@ -690,6 +830,14 @@ void arc_mmu_init(void) if (mmu->pg_sz_k != TO_KB(PAGE_SIZE)) panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE)); + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + mmu->s_pg_sz_m != TO_MB(HPAGE_PMD_SIZE)) + panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n", + (unsigned long)TO_MB(HPAGE_PMD_SIZE)); + + if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae) + panic("Hardware doesn't support PAE40\n"); + /* Enable the MMU */ write_aux_reg(ARC_REG_PID, MMU_ENABLE); @@ -725,15 +873,15 @@ void arc_mmu_init(void) * the duplicate one. * -Knob to be verbose abt it.(TODO: hook them up to debugfs) */ -volatile int dup_pd_verbose = 1;/* Be slient abt it or complain (default) */ +volatile int dup_pd_silent; /* Be slient abt it or complain (default) */ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, struct pt_regs *regs) { - int set, way, n; - unsigned long flags, is_valid; struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; - unsigned int pd0[mmu->ways], pd1[mmu->ways]; + unsigned int pd0[mmu->ways]; + unsigned long flags; + int set; local_irq_save(flags); @@ -743,14 +891,16 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, /* loop thru all sets of TLB */ for (set = 0; set < mmu->sets; set++) { + int is_valid, way; + /* read out all the ways of current set */ for (way = 0, is_valid = 0; way < mmu->ways; way++) { write_aux_reg(ARC_REG_TLBINDEX, SET_WAY_TO_IDX(mmu, set, way)); write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead); pd0[way] = read_aux_reg(ARC_REG_TLBPD0); - pd1[way] = read_aux_reg(ARC_REG_TLBPD1); is_valid |= pd0[way] & _PAGE_PRESENT; + pd0[way] &= PAGE_MASK; } /* If all the WAYS in SET are empty, skip to next SET */ @@ -759,30 +909,28 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, /* Scan the set for duplicate ways: needs a nested loop */ for (way = 0; way < mmu->ways - 1; way++) { + + int n; + if (!pd0[way]) continue; for (n = way + 1; n < mmu->ways; n++) { - if ((pd0[way] & PAGE_MASK) == - (pd0[n] & PAGE_MASK)) { + if (pd0[way] != pd0[n]) + continue; - if (dup_pd_verbose) { - pr_info("Duplicate PD's @" - "[%d:%d]/[%d:%d]\n", - set, way, set, n); - pr_info("TLBPD0[%u]: %08x\n", - way, pd0[way]); - } + if (!dup_pd_silent) + pr_info("Dup TLB PD0 %08x @ set %d ways %d,%d\n", + pd0[way], set, way, n); - /* - * clear entry @way and not @n. This is - * critical to our optimised loop - */ - pd0[way] = pd1[way] = 0; - write_aux_reg(ARC_REG_TLBINDEX, + /* + * clear entry @way and not @n. + * This is critical to our optimised loop + */ + pd0[way] = 0; + write_aux_reg(ARC_REG_TLBINDEX, SET_WAY_TO_IDX(mmu, set, way)); - __tlb_entry_erase(); - } + __tlb_entry_erase(); } } } diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index f6f4c3cb505d..63860adc4814 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -205,20 +205,38 @@ ex_saved_reg1: #endif lsr r0, r2, PGDIR_SHIFT ; Bits for indexing into PGD - ld.as r1, [r1, r0] ; PGD entry corresp to faulting addr - and.f r1, r1, PAGE_MASK ; Ignoring protection and other flags - ; contains Ptr to Page Table - bz.d do_slow_path_pf ; if no Page Table, do page fault + ld.as r3, [r1, r0] ; PGD entry corresp to faulting addr + tst r3, r3 + bz do_slow_path_pf ; if no Page Table, do page fault + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp) + add2.nz r1, r1, r0 + bnz.d 2f ; YES: PGD == PMD has THP PTE: stop pgd walk + mov.nz r0, r3 + +#endif + and r1, r3, PAGE_MASK ; Get the PTE entry: The idea is ; (1) x = addr >> PAGE_SHIFT -> masks page-off bits from @fault-addr ; (2) y = x & (PTRS_PER_PTE - 1) -> to get index - ; (3) z = pgtbl[y] - ; To avoid the multiply by in end, we do the -2, <<2 below + ; (3) z = (pgtbl + y * 4) + +#ifdef CONFIG_ARC_HAS_PAE40 +#define PTE_SIZE_LOG 3 /* 8 == 2 ^ 3 */ +#else +#define PTE_SIZE_LOG 2 /* 4 == 2 ^ 2 */ +#endif + + ; multiply in step (3) above avoided by shifting lesser in step (1) + lsr r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG ) + and r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG ) + ld.aw r0, [r1, r0] ; r0: PTE (lower word only for PAE40) + ; r1: PTE ptr + +2: - lsr r0, r2, (PAGE_SHIFT - 2) - and r0, r0, ( (PTRS_PER_PTE - 1) << 2) - ld.aw r0, [r1, r0] ; get PTE and PTE ptr for fault addr #ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT and.f 0, r0, _PAGE_PRESENT bz 1f @@ -233,18 +251,23 @@ ex_saved_reg1: ;----------------------------------------------------------------- ; Convert Linux PTE entry into TLB entry ; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu +; (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI]) ; IN: r0 = PTE, r1 = ptr to PTE .macro CONV_PTE_TO_TLB - and r3, r0, PTE_BITS_RWX ; r w x - lsl r2, r3, 3 ; r w x 0 0 0 (GLOBAL, kernel only) + and r3, r0, PTE_BITS_RWX ; r w x + lsl r2, r3, 3 ; Kr Kw Kx 0 0 0 (GLOBAL, kernel only) and.f 0, r0, _PAGE_GLOBAL - or.z r2, r2, r3 ; r w x r w x (!GLOBAL, user page) + or.z r2, r2, r3 ; Kr Kw Kx Ur Uw Ux (!GLOBAL, user page) and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE or r3, r3, r2 - sr r3, [ARC_REG_TLBPD1] ; these go in PD1 + sr r3, [ARC_REG_TLBPD1] ; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C +#ifdef CONFIG_ARC_HAS_PAE40 + ld r3, [r1, 4] ; paddr[39..32] + sr r3, [ARC_REG_TLBPD1HI] +#endif and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb @@ -365,7 +388,7 @@ ENTRY(EV_TLBMissD) lr r3, [ecr] or r0, r0, _PAGE_ACCESSED ; Accessed bit always btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; See if it was a Write Access ? - or.nz r0, r0, _PAGE_MODIFIED ; if Write, set Dirty bit as well + or.nz r0, r0, _PAGE_DIRTY ; if Write, set Dirty bit as well st_s r0, [r1] ; Write back PTE CONV_PTE_TO_TLB diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index 0a77b19e1df8..1b0f0f458a2b 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -455,11 +455,6 @@ static void __init axs103_early_init(void) axs10x_print_board_ver(AXC003_CREG + 4088, "AXC003 CPU Card"); axs10x_early_init(); - -#ifdef CONFIG_ARC_MCIP - /* No Hardware init, but filling the smp ops callbacks */ - mcip_init_early_smp(); -#endif } #endif @@ -487,9 +482,6 @@ static const char *axs103_compat[] __initconst = { MACHINE_START(AXS103, "axs103") .dt_compat = axs103_compat, .init_early = axs103_early_init, -#ifdef CONFIG_ARC_MCIP - .init_smp = mcip_init_smp, -#endif MACHINE_END /* diff --git a/arch/arc/plat-sim/platform.c b/arch/arc/plat-sim/platform.c index d9e35b4a2f08..dde692812bc1 100644 --- a/arch/arc/plat-sim/platform.c +++ b/arch/arc/plat-sim/platform.c @@ -30,8 +30,4 @@ static const char *simulation_compat[] __initconst = { MACHINE_START(SIMULATION, "simulation") .dt_compat = simulation_compat, -#ifdef CONFIG_ARC_MCIP - .init_early = mcip_init_early_smp, - .init_smp = mcip_init_smp, -#endif MACHINE_END diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 29c57b2cb344..3eabbbbfd578 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -30,9 +30,19 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +#ifdef CONFIG_TRANSPARENT_HUGEPAGE extern int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty); +#else +static inline int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty) +{ + BUILD_BUG(); + return 0; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG @@ -64,12 +74,12 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd)); return r; } -#else /* CONFIG_TRANSPARENT_HUGEPAGE */ +#else static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - BUG(); + BUILD_BUG(); return 0; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -81,8 +91,21 @@ int ptep_clear_flush_young(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH -int pmdp_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +extern int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); +#else +/* + * Despite relevant to THP only, this API is called from generic rmap code + * under PageTransHuge(), hence needs a dummy implementation for !THP + */ +static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + BUILD_BUG(); + return 0; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR @@ -175,11 +198,11 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, pmd_t old_pmd = *pmdp; set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd)); } -#else /* CONFIG_TRANSPARENT_HUGEPAGE */ +#else static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - BUG(); + BUILD_BUG(); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif @@ -248,7 +271,7 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) #else /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) { - BUG(); + BUILD_BUG(); return 0; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bbac913f96bc..3fd0311c3ba7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1880,7 +1880,7 @@ static int __split_huge_page_map(struct page *page, * here). But it is generally safer to never allow * small and huge TLB entries for the same virtual * address to be loaded simultaneously. So instead of - * doing "pmd_populate(); flush_tlb_range();" we first + * doing "pmd_populate(); flush_pmd_tlb_range();" we first * mark the current pmd notpresent (atomically because * here the pmd_trans_huge and pmd_trans_splitting * must remain set at all times on the pmd until the diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 6b674e00153c..7d3db0247983 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -57,26 +57,6 @@ int ptep_set_access_flags(struct vm_area_struct *vma, } #endif -#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS -int pmdp_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp, - pmd_t entry, int dirty) -{ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - int changed = !pmd_same(*pmdp, entry); - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - if (changed) { - set_pmd_at(vma->vm_mm, address, pmdp, entry); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); - } - return changed; -#else /* CONFIG_TRANSPARENT_HUGEPAGE */ - BUG(); - return 0; -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -} -#endif - #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) @@ -89,23 +69,6 @@ int ptep_clear_flush_young(struct vm_area_struct *vma, } #endif -#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH -int pmdp_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - int young; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - VM_BUG_ON(address & ~HPAGE_PMD_MASK); -#else - BUG(); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - young = pmdp_test_and_clear_young(vma, address, pmdp); - if (young) - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); - return young; -} -#endif - #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) @@ -119,8 +82,51 @@ pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, } #endif -#ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH #ifdef CONFIG_TRANSPARENT_HUGEPAGE + +#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE + +/* + * ARCHes with special requirements for evicting THP backing TLB entries can + * implement this. Otherwise also, it can help optimize normal TLB flush in + * THP regime. stock flush_tlb_range() typically has optimization to nuke the + * entire TLB TLB if flush span is greater than a threshhold, which will + * likely be true for a single huge page. Thus a single thp flush will + * invalidate the entire TLB which is not desitable. + * e.g. see arch/arc: flush_pmd_tlb_range + */ +#define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) +#endif + +#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty) +{ + int changed = !pmd_same(*pmdp, entry); + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + if (changed) { + set_pmd_at(vma->vm_mm, address, pmdp, entry); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + } + return changed; +} +#endif + +#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + int young; + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + young = pmdp_test_and_clear_young(vma, address, pmdp); + if (young) + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + return young; +} +#endif + +#ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -128,14 +134,12 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, VM_BUG_ON(address & ~HPAGE_PMD_MASK); VM_BUG_ON(!pmd_trans_huge(*pmdp)); pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif #ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH -#ifdef CONFIG_TRANSPARENT_HUGEPAGE void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -143,13 +147,11 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, VM_BUG_ON(address & ~HPAGE_PMD_MASK); set_pmd_at(vma->vm_mm, address, pmdp, pmd); /* tlb flush only to serialize against gup-fast */ - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT -#ifdef CONFIG_TRANSPARENT_HUGEPAGE void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) { @@ -162,11 +164,9 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, list_add(&pgtable->lru, &pmd_huge_pte(mm, pmdp)->lru); pmd_huge_pte(mm, pmdp) = pgtable; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW -#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* no "address" argument so destroys page coloring of some arch */ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { @@ -185,23 +185,19 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) } return pgtable; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif #ifndef __HAVE_ARCH_PMDP_INVALIDATE -#ifdef CONFIG_TRANSPARENT_HUGEPAGE void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { pmd_t entry = *pmdp; set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry)); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif #ifndef pmdp_collapse_flush -#ifdef CONFIG_TRANSPARENT_HUGEPAGE pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -214,8 +210,8 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, VM_BUG_ON(address & ~HPAGE_PMD_MASK); VM_BUG_ON(pmd_trans_huge(*pmdp)); pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */