diff --git a/Documentation/riscv/boot-image-header.txt b/Documentation/riscv/boot-image-header.txt new file mode 100644 index 000000000000..1b73fea23b39 --- /dev/null +++ b/Documentation/riscv/boot-image-header.txt @@ -0,0 +1,50 @@ + Boot image header in RISC-V Linux + ============================================= + +Author: Atish Patra +Date : 20 May 2019 + +This document only describes the boot image header details for RISC-V Linux. +The complete booting guide will be available at Documentation/riscv/booting.txt. + +The following 64-byte header is present in decompressed Linux kernel image. + + u32 code0; /* Executable code */ + u32 code1; /* Executable code */ + u64 text_offset; /* Image load offset, little endian */ + u64 image_size; /* Effective Image size, little endian */ + u64 flags; /* kernel flags, little endian */ + u32 version; /* Version of this header */ + u32 res1 = 0; /* Reserved */ + u64 res2 = 0; /* Reserved */ + u64 magic = 0x5643534952; /* Magic number, little endian, "RISCV" */ + u32 res3; /* Reserved for additional RISC-V specific header */ + u32 res4; /* Reserved for PE COFF offset */ + +This header format is compliant with PE/COFF header and largely inspired from +ARM64 header. Thus, both ARM64 & RISC-V header can be combined into one common +header in future. + +Notes: +- This header can also be reused to support EFI stub for RISC-V in future. EFI + specification needs PE/COFF image header in the beginning of the kernel image + in order to load it as an EFI application. In order to support EFI stub, + code0 should be replaced with "MZ" magic string and res5(at offset 0x3c) should + point to the rest of the PE/COFF header. + +- version field indicate header version number. + Bits 0:15 - Minor version + Bits 16:31 - Major version + + This preserves compatibility across newer and older version of the header. + The current version is defined as 0.1. + +- res3 is reserved for offset to any other additional fields. This makes the + header extendible in future. One example would be to accommodate ISA + extension for RISC-V in future. For current version, it is set to be zero. + +- In current header, the flag field has only one field. + Bit 0: Kernel endianness. 1 if BE, 0 if LE. + +- Image size is mandatory for boot loader to load kernel image. Booting will + fail otherwise. diff --git a/MAINTAINERS b/MAINTAINERS index e50faa93e170..500cdb68ccbc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13720,7 +13720,7 @@ RISC-V ARCHITECTURE M: Palmer Dabbelt M: Albert Ou L: linux-riscv@lists.infradead.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/palmer/riscv-linux.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git S: Supported F: arch/riscv/ K: riscv @@ -14582,7 +14582,7 @@ M: Paul Walmsley L: linux-riscv@lists.infradead.org T: git git://github.com/sifive/riscv-linux.git S: Supported -K: sifive +K: [^@]sifive N: sifive SIFIVE FU540 SYSTEM-ON-CHIP diff --git a/arch/Kconfig b/arch/Kconfig index 6dd1faab6ccb..ac0fba400ded 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -569,6 +569,9 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD config HAVE_ARCH_HUGE_VMAP bool +config ARCH_WANT_HUGE_PMD_SHARE + bool + config HAVE_ARCH_SOFT_DIRTY bool diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e1ea69994e0f..3adcec05b1f6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -73,6 +73,7 @@ config ARM64 select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT select ARCH_WANT_FRAME_POINTERS + select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER @@ -906,7 +907,6 @@ config SYS_SUPPORTS_HUGETLBFS def_bool y config ARCH_WANT_HUGE_PMD_SHARE - def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) config ARCH_HAS_CACHE_LINE_SIZE def_bool y diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 13a1c0d04e9e..59a4727ecd6c 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -52,6 +52,8 @@ config RISCV select ARCH_HAS_MMIOWB select HAVE_EBPF_JIT if 64BIT select EDAC_SUPPORT + select ARCH_HAS_GIGANTIC_PAGE + select ARCH_WANT_HUGE_PMD_SHARE if 64BIT config MMU def_bool y @@ -66,6 +68,12 @@ config PAGE_OFFSET default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB +config ARCH_WANT_GENERAL_HUGETLB + def_bool y + +config SYS_SUPPORTS_HUGETLBFS + def_bool y + config STACKTRACE_SUPPORT def_bool y @@ -97,6 +105,8 @@ config PGTABLE_LEVELS default 3 if 64BIT default 2 +source "arch/riscv/Kconfig.socs" + menu "Platform type" choice diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs new file mode 100644 index 000000000000..536c0ef4aee8 --- /dev/null +++ b/arch/riscv/Kconfig.socs @@ -0,0 +1,13 @@ +menu "SoC selection" + +config SOC_SIFIVE + bool "SiFive SoCs" + select SERIAL_SIFIVE + select SERIAL_SIFIVE_CONSOLE + select CLK_SIFIVE + select CLK_SIFIVE_FU540_PRCI + select SIFIVE_PLIC + help + This enables support for SiFive SoC platform hardware. + +endmenu diff --git a/arch/riscv/boot/dts/sifive/Makefile b/arch/riscv/boot/dts/sifive/Makefile index baaeef9efdcb..6d6189e6e4af 100644 --- a/arch/riscv/boot/dts/sifive/Makefile +++ b/arch/riscv/boot/dts/sifive/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0 -dtb-y += hifive-unleashed-a00.dtb +dtb-$(CONFIG_SOC_SIFIVE) += hifive-unleashed-a00.dtb diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 04944fb4fa7a..b7b749b18853 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -1,5 +1,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y @@ -12,6 +14,7 @@ CONFIG_CHECKPOINT_RESTORE=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y +CONFIG_SOC_SIFIVE=y CONFIG_SMP=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -49,8 +52,6 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y -CONFIG_SERIAL_SIFIVE=y -CONFIG_SERIAL_SIFIVE_CONSOLE=y CONFIG_HVC_RISCV_SBI=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_DRM=y @@ -66,9 +67,6 @@ CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y CONFIG_VIRTIO_MMIO=y -CONFIG_CLK_SIFIVE=y -CONFIG_CLK_SIFIVE_FU540_PRCI=y -CONFIG_SIFIVE_PLIC=y CONFIG_SPI_SIFIVE=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index 1a911ed8e772..d5449ef805a3 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -1,5 +1,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index ad8678f1b54a..555b20b11dc3 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -6,11 +6,66 @@ #ifndef _ASM_RISCV_CACHEFLUSH_H #define _ASM_RISCV_CACHEFLUSH_H -#include +#include -#undef flush_icache_range -#undef flush_icache_user_range -#undef flush_dcache_page +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 + +/* + * The cache doesn't need to be flushed when TLB entries change when + * the cache is mapped to physical memory, not virtual memory + */ +static inline void flush_cache_all(void) +{ +} + +static inline void flush_cache_mm(struct mm_struct *mm) +{ +} + +static inline void flush_cache_dup_mm(struct mm_struct *mm) +{ +} + +static inline void flush_cache_range(struct vm_area_struct *vma, + unsigned long start, + unsigned long end) +{ +} + +static inline void flush_cache_page(struct vm_area_struct *vma, + unsigned long vmaddr, + unsigned long pfn) +{ +} + +static inline void flush_dcache_mmap_lock(struct address_space *mapping) +{ +} + +static inline void flush_dcache_mmap_unlock(struct address_space *mapping) +{ +} + +static inline void flush_icache_page(struct vm_area_struct *vma, + struct page *page) +{ +} + +static inline void flush_cache_vmap(unsigned long start, unsigned long end) +{ +} + +static inline void flush_cache_vunmap(unsigned long start, unsigned long end) +{ +} + +#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ + do { \ + memcpy(dst, src, len); \ + flush_icache_user_range(vma, page, vaddr, len); \ + } while (0) +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + memcpy(dst, src, len) static inline void local_flush_icache_all(void) { diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index c207f6634b91..9c66033c3a54 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -21,6 +21,11 @@ */ enum fixed_addresses { FIX_HOLE, +#define FIX_FDT_SIZE SZ_1M + FIX_FDT_END, + FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, + FIX_PTE, + FIX_PMD, FIX_EARLYCON_MEM_BASE, __end_of_fixed_addresses }; diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h new file mode 100644 index 000000000000..728a5db66597 --- /dev/null +++ b/arch/riscv/include/asm/hugetlb.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_HUGETLB_H +#define _ASM_RISCV_HUGETLB_H + +#include +#include + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) { + return 0; +} + +static inline void arch_clear_hugepage_flags(struct page *page) +{ +} + +#endif /* _ASM_RISCV_HUGETLB_H */ diff --git a/arch/riscv/include/asm/image.h b/arch/riscv/include/asm/image.h new file mode 100644 index 000000000000..ef28e106f247 --- /dev/null +++ b/arch/riscv/include/asm/image.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_IMAGE_H +#define __ASM_IMAGE_H + +#define RISCV_IMAGE_MAGIC "RISCV" + +#define RISCV_IMAGE_FLAG_BE_SHIFT 0 +#define RISCV_IMAGE_FLAG_BE_MASK 0x1 + +#define RISCV_IMAGE_FLAG_LE 0 +#define RISCV_IMAGE_FLAG_BE 1 + +#ifdef CONFIG_CPU_BIG_ENDIAN +#error conversion of header fields to LE not yet implemented +#else +#define __HEAD_FLAG_BE RISCV_IMAGE_FLAG_LE +#endif + +#define __HEAD_FLAG(field) (__HEAD_FLAG_##field << \ + RISCV_IMAGE_FLAG_##field##_SHIFT) + +#define __HEAD_FLAGS (__HEAD_FLAG(BE)) + +#define RISCV_HEADER_VERSION_MAJOR 0 +#define RISCV_HEADER_VERSION_MINOR 1 + +#define RISCV_HEADER_VERSION (RISCV_HEADER_VERSION_MAJOR << 16 | \ + RISCV_HEADER_VERSION_MINOR) + +#ifndef __ASSEMBLY__ +/** + * struct riscv_image_header - riscv kernel image header + * @code0: Executable code + * @code1: Executable code + * @text_offset: Image load offset (little endian) + * @image_size: Effective Image size (little endian) + * @flags: kernel flags (little endian) + * @version: version + * @res1: reserved + * @res2: reserved + * @magic: Magic number + * @res3: reserved (will be used for additional RISC-V specific + * header) + * @res4: reserved (will be used for PE COFF offset) + * + * The intention is for this header format to be shared between multiple + * architectures to avoid a proliferation of image header formats. + */ + +struct riscv_image_header { + u32 code0; + u32 code1; + u64 text_offset; + u64 image_size; + u64 flags; + u32 version; + u32 res1; + u64 res2; + u64 magic; + u32 res3; + u32 res4; +}; +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_IMAGE_H */ diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 8ddb6c7fedac..707e00a8430b 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -16,6 +16,16 @@ #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE - 1)) +#ifdef CONFIG_64BIT +#define HUGE_MAX_HSTATE 2 +#else +#define HUGE_MAX_HSTATE 1 +#endif +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + /* * PAGE_OFFSET -- the first address of the first page of memory. * When not using MMU this corresponds to the first free page in @@ -115,8 +125,4 @@ extern unsigned long min_low_pfn; #include #include -/* vDSO support */ -/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ -#define __HAVE_ARCH_GATE_AREA - #endif /* _ASM_RISCV_PAGE_H */ diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 45dfac2ac51f..74630989006d 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -70,6 +70,11 @@ static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot) return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); } +static inline unsigned long _pmd_pfn(pmd_t pmd) +{ + return pmd_val(pmd) >> _PAGE_PFN_SHIFT; +} + #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index f7c3f7de15f2..a364aba23d55 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -59,6 +59,8 @@ #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC) +#define PAGE_TABLE __pgprot(_PAGE_TABLE) + extern pgd_t swapper_pg_dir[]; /* MAP_PRIVATE permissions: xwr (copy-on-write) */ @@ -113,12 +115,16 @@ static inline void pmd_clear(pmd_t *pmdp) set_pmd(pmdp, __pmd(0)); } - static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot) { return __pgd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); } +static inline unsigned long _pgd_pfn(pgd_t pgd) +{ + return pgd_val(pgd) >> _PAGE_PFN_SHIFT; +} + #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) /* Locate an entry in the page global directory */ @@ -250,6 +256,11 @@ static inline pte_t pte_mkspecial(pte_t pte) return __pte(pte_val(pte) | _PAGE_SPECIAL); } +static inline pte_t pte_mkhuge(pte_t pte) +{ + return pte; +} + /* Modify page protection bits */ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { @@ -396,6 +407,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define kern_addr_valid(addr) (1) /* FIXME */ #endif +extern void *dtb_early_va; extern void setup_bootmem(void); extern void paging_init(void); @@ -409,7 +421,7 @@ static inline void pgtable_cache_init(void) #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) /* - * Task size is 0x40000000000 for RV64 or 0xb800000 for RV32. + * Task size is 0x4000000000 for RV64 or 0xb800000 for RV32. * Note that PGDIR_SIZE must evenly divide TASK_SIZE. */ #ifdef CONFIG_64BIT diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 4e46f31072da..0f1ba17e476f 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -11,9 +11,41 @@ #include #include #include +#include __INIT ENTRY(_start) + /* + * Image header expected by Linux boot-loaders. The image header data + * structure is described in asm/image.h. + * Do not modify it without modifying the structure and all bootloaders + * that expects this header format!! + */ + /* jump to start kernel */ + j _start_kernel + /* reserved */ + .word 0 + .balign 8 +#if __riscv_xlen == 64 + /* Image load offset(2MB) from start of RAM */ + .dword 0x200000 +#else + /* Image load offset(4MB) from start of RAM */ + .dword 0x400000 +#endif + /* Effective size of kernel image */ + .dword _end - _start + .dword __HEAD_FLAGS + .word RISCV_HEADER_VERSION + .word 0 + .dword 0 + .asciz RISCV_IMAGE_MAGIC + .word 0 + .balign 4 + .word 0 + +.global _start_kernel +_start_kernel: /* Mask all interrupts */ csrw CSR_SIE, zero csrw CSR_SIP, zero @@ -55,7 +87,9 @@ clear_bss_done: /* Initialize page tables and relocate to virtual addresses */ la sp, init_thread_union + THREAD_SIZE + mv a0, s1 call setup_vm + la a0, early_pg_dir call relocate /* Restore C environment */ @@ -64,25 +98,23 @@ clear_bss_done: la sp, init_thread_union + THREAD_SIZE /* Start the kernel */ - mv a0, s1 call parse_dtb tail start_kernel relocate: /* Relocate return address */ li a1, PAGE_OFFSET - la a0, _start - sub a1, a1, a0 + la a2, _start + sub a1, a1, a2 add ra, ra, a1 /* Point stvec to virtual address of intruction after satp write */ - la a0, 1f - add a0, a0, a1 - csrw CSR_STVEC, a0 + la a2, 1f + add a2, a2, a1 + csrw CSR_STVEC, a2 /* Compute satp for kernel page tables, but don't load it yet */ - la a2, swapper_pg_dir - srl a2, a2, PAGE_SHIFT + srl a2, a0, PAGE_SHIFT li a1, SATP_MODE or a2, a2, a1 @@ -148,6 +180,7 @@ relocate: fence /* Enable virtual memory and relocate to virtual address */ + la a0, swapper_pg_dir call relocate tail smp_callin diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index b92e6831d1ec..a990a6cb184f 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -39,11 +39,9 @@ struct screen_info screen_info = { atomic_t hart_lottery; unsigned long boot_cpu_hartid; -void __init parse_dtb(phys_addr_t dtb_phys) +void __init parse_dtb(void) { - void *dtb = __va(dtb_phys); - - if (early_init_dt_scan(dtb)) + if (early_init_dt_scan(dtb_early_va)) return; pr_err("No DTB passed to the kernel\n"); diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index a0084c36d270..c9c21e0d5641 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -92,22 +92,3 @@ const char *arch_vma_name(struct vm_area_struct *vma) return "[vdso]"; return NULL; } - -/* - * Function stubs to prevent linker errors when AT_SYSINFO_EHDR is defined - */ - -int in_gate_area_no_mm(unsigned long addr) -{ - return 0; -} - -int in_gate_area(struct mm_struct *mm, unsigned long addr) -{ - return 0; -} - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return NULL; -} diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index fc51d3b7876e..74055e1d6f21 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -12,3 +12,5 @@ obj-y += ioremap.o obj-y += cacheflush.o obj-y += context.o obj-y += sifive_l2_cache.o + +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c new file mode 100644 index 000000000000..0d4747e9d5b5 --- /dev/null +++ b/arch/riscv/mm/hugetlbpage.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +int pud_huge(pud_t pud) +{ + return pud_present(pud) && + (pud_val(pud) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); +} + +int pmd_huge(pmd_t pmd) +{ + return pmd_present(pmd) && + (pmd_val(pmd) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); +} + +static __init int setup_hugepagesz(char *opt) +{ + unsigned long ps = memparse(opt, &opt); + + if (ps == HPAGE_SIZE) { + hugetlb_add_hstate(HPAGE_SHIFT - PAGE_SHIFT); + } else if (IS_ENABLED(CONFIG_64BIT) && ps == PUD_SIZE) { + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else { + hugetlb_bad_size(); + pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); + return 0; + } + + return 1; +} +__setup("hugepagesz=", setup_hugepagesz); + +#ifdef CONFIG_CONTIG_ALLOC +static __init int gigantic_pages_init(void) +{ + /* With CONTIG_ALLOC, we can allocate gigantic pages at runtime */ + if (IS_ENABLED(CONFIG_64BIT) && !size_to_hstate(1UL << PUD_SHIFT)) + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + return 0; +} +arch_initcall(gigantic_pages_init); +#endif diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 84747d7a1e85..42bf939693d3 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2019 Western Digital Corporation or its affiliates. */ #include @@ -21,6 +22,8 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); +extern char _start[]; + static void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, }; @@ -39,13 +42,6 @@ void setup_zero_page(void) memset((void *)empty_zero_page, 0, PAGE_SIZE); } -void __init paging_init(void) -{ - setup_zero_page(); - local_flush_tlb_all(); - zone_sizes_init(); -} - void __init mem_init(void) { #ifdef CONFIG_FLATMEM @@ -84,29 +80,20 @@ disable: initrd_start = 0; initrd_end = 0; } - -void __init free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} #endif /* CONFIG_BLK_DEV_INITRD */ void __init setup_bootmem(void) { struct memblock_region *reg; phys_addr_t mem_size = 0; + phys_addr_t vmlinux_end = __pa(&_end); + phys_addr_t vmlinux_start = __pa(&_start); /* Find the memory region containing the kernel */ for_each_memblock(memory, reg) { - phys_addr_t vmlinux_end = __pa(_end); phys_addr_t end = reg->base + reg->size; if (reg->base <= vmlinux_end && vmlinux_end <= end) { - /* - * Reserve from the start of the region to the end of - * the kernel - */ - memblock_reserve(reg->base, vmlinux_end - reg->base); mem_size = min(reg->size, (phys_addr_t)-PAGE_OFFSET); /* @@ -120,6 +107,9 @@ void __init setup_bootmem(void) } BUG_ON(mem_size == 0); + /* Reserve from the start of the kernel to the end of the kernel */ + memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); + set_max_mapnr(PFN_DOWN(mem_size)); max_low_pfn = PFN_DOWN(memblock_end_of_DRAM()); @@ -147,17 +137,15 @@ EXPORT_SYMBOL(va_pa_offset); unsigned long pfn_base; EXPORT_SYMBOL(pfn_base); +void *dtb_early_va; pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; -pgd_t trampoline_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); - -#ifndef __PAGETABLE_PMD_FOLDED -#define NUM_SWAPPER_PMDS ((uintptr_t)-PAGE_OFFSET >> PGDIR_SHIFT) -pmd_t swapper_pmd[PTRS_PER_PMD*((-PAGE_OFFSET)/PGDIR_SIZE)] __page_aligned_bss; -pmd_t trampoline_pmd[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); -pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; -#endif - +pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; +static bool mmu_enabled; + +#define MAX_EARLY_MAPPING_SIZE SZ_128M + +pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) { @@ -176,6 +164,156 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) } } +static pte_t *__init get_pte_virt(phys_addr_t pa) +{ + if (mmu_enabled) { + clear_fixmap(FIX_PTE); + return (pte_t *)set_fixmap_offset(FIX_PTE, pa); + } else { + return (pte_t *)((uintptr_t)pa); + } +} + +static phys_addr_t __init alloc_pte(uintptr_t va) +{ + /* + * We only create PMD or PGD early mappings so we + * should never reach here with MMU disabled. + */ + BUG_ON(!mmu_enabled); + + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static void __init create_pte_mapping(pte_t *ptep, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + uintptr_t pte_index = pte_index(va); + + BUG_ON(sz != PAGE_SIZE); + + if (pte_none(ptep[pte_index])) + ptep[pte_index] = pfn_pte(PFN_DOWN(pa), prot); +} + +#ifndef __PAGETABLE_PMD_FOLDED + +pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss; +pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; + +#if MAX_EARLY_MAPPING_SIZE < PGDIR_SIZE +#define NUM_EARLY_PMDS 1UL +#else +#define NUM_EARLY_PMDS (1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE) +#endif +pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE); + +static pmd_t *__init get_pmd_virt(phys_addr_t pa) +{ + if (mmu_enabled) { + clear_fixmap(FIX_PMD); + return (pmd_t *)set_fixmap_offset(FIX_PMD, pa); + } else { + return (pmd_t *)((uintptr_t)pa); + } +} + +static phys_addr_t __init alloc_pmd(uintptr_t va) +{ + uintptr_t pmd_num; + + if (mmu_enabled) + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); + + pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT; + BUG_ON(pmd_num >= NUM_EARLY_PMDS); + return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD]; +} + +static void __init create_pmd_mapping(pmd_t *pmdp, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + pte_t *ptep; + phys_addr_t pte_phys; + uintptr_t pmd_index = pmd_index(va); + + if (sz == PMD_SIZE) { + if (pmd_none(pmdp[pmd_index])) + pmdp[pmd_index] = pfn_pmd(PFN_DOWN(pa), prot); + return; + } + + if (pmd_none(pmdp[pmd_index])) { + pte_phys = alloc_pte(va); + pmdp[pmd_index] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE); + ptep = get_pte_virt(pte_phys); + memset(ptep, 0, PAGE_SIZE); + } else { + pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_index])); + ptep = get_pte_virt(pte_phys); + } + + create_pte_mapping(ptep, va, pa, sz, prot); +} + +#define pgd_next_t pmd_t +#define alloc_pgd_next(__va) alloc_pmd(__va) +#define get_pgd_next_virt(__pa) get_pmd_virt(__pa) +#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ + create_pmd_mapping(__nextp, __va, __pa, __sz, __prot) +#define PTE_PARENT_SIZE PMD_SIZE +#define fixmap_pgd_next fixmap_pmd +#else +#define pgd_next_t pte_t +#define alloc_pgd_next(__va) alloc_pte(__va) +#define get_pgd_next_virt(__pa) get_pte_virt(__pa) +#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ + create_pte_mapping(__nextp, __va, __pa, __sz, __prot) +#define PTE_PARENT_SIZE PGDIR_SIZE +#define fixmap_pgd_next fixmap_pte +#endif + +static void __init create_pgd_mapping(pgd_t *pgdp, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + pgd_next_t *nextp; + phys_addr_t next_phys; + uintptr_t pgd_index = pgd_index(va); + + if (sz == PGDIR_SIZE) { + if (pgd_val(pgdp[pgd_index]) == 0) + pgdp[pgd_index] = pfn_pgd(PFN_DOWN(pa), prot); + return; + } + + if (pgd_val(pgdp[pgd_index]) == 0) { + next_phys = alloc_pgd_next(va); + pgdp[pgd_index] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE); + nextp = get_pgd_next_virt(next_phys); + memset(nextp, 0, PAGE_SIZE); + } else { + next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_index])); + nextp = get_pgd_next_virt(next_phys); + } + + create_pgd_next_mapping(nextp, va, pa, sz, prot); +} + +static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) +{ + uintptr_t map_size = PAGE_SIZE; + + /* Upgrade to PMD/PGDIR mappings whenever possible */ + if (!(base & (PTE_PARENT_SIZE - 1)) && + !(size & (PTE_PARENT_SIZE - 1))) + map_size = PTE_PARENT_SIZE; + + return map_size; +} + /* * setup_vm() is called from head.S with MMU-off. * @@ -195,55 +333,115 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) "not use absolute addressing." #endif -asmlinkage void __init setup_vm(void) +asmlinkage void __init setup_vm(uintptr_t dtb_pa) { - extern char _start; - uintptr_t i; - uintptr_t pa = (uintptr_t) &_start; - pgprot_t prot = __pgprot(pgprot_val(PAGE_KERNEL) | _PAGE_EXEC); + uintptr_t va, end_va; + uintptr_t load_pa = (uintptr_t)(&_start); + uintptr_t load_sz = (uintptr_t)(&_end) - load_pa; + uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE); - va_pa_offset = PAGE_OFFSET - pa; - pfn_base = PFN_DOWN(pa); + va_pa_offset = PAGE_OFFSET - load_pa; + pfn_base = PFN_DOWN(load_pa); + + /* + * Enforce boot alignment requirements of RV32 and + * RV64 by only allowing PMD or PGD mappings. + */ + BUG_ON(map_size == PAGE_SIZE); /* Sanity check alignment and size */ BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); - BUG_ON((pa % (PAGE_SIZE * PTRS_PER_PTE)) != 0); + BUG_ON((load_pa % map_size) != 0); + BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE); + + /* Setup early PGD for fixmap */ + create_pgd_mapping(early_pg_dir, FIXADDR_START, + (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); #ifndef __PAGETABLE_PMD_FOLDED - trampoline_pg_dir[(PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD] = - pfn_pgd(PFN_DOWN((uintptr_t)trampoline_pmd), - __pgprot(_PAGE_TABLE)); - trampoline_pmd[0] = pfn_pmd(PFN_DOWN(pa), prot); - - for (i = 0; i < (-PAGE_OFFSET)/PGDIR_SIZE; ++i) { - size_t o = (PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD + i; - - swapper_pg_dir[o] = - pfn_pgd(PFN_DOWN((uintptr_t)swapper_pmd) + i, - __pgprot(_PAGE_TABLE)); - } - for (i = 0; i < ARRAY_SIZE(swapper_pmd); i++) - swapper_pmd[i] = pfn_pmd(PFN_DOWN(pa + i * PMD_SIZE), prot); - - swapper_pg_dir[(FIXADDR_START >> PGDIR_SHIFT) % PTRS_PER_PGD] = - pfn_pgd(PFN_DOWN((uintptr_t)fixmap_pmd), - __pgprot(_PAGE_TABLE)); - fixmap_pmd[(FIXADDR_START >> PMD_SHIFT) % PTRS_PER_PMD] = - pfn_pmd(PFN_DOWN((uintptr_t)fixmap_pte), - __pgprot(_PAGE_TABLE)); + /* Setup fixmap PMD */ + create_pmd_mapping(fixmap_pmd, FIXADDR_START, + (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE); + /* Setup trampoline PGD and PMD */ + create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET, + (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE); + create_pmd_mapping(trampoline_pmd, PAGE_OFFSET, + load_pa, PMD_SIZE, PAGE_KERNEL_EXEC); #else - trampoline_pg_dir[(PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD] = - pfn_pgd(PFN_DOWN(pa), prot); + /* Setup trampoline PGD */ + create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET, + load_pa, PGDIR_SIZE, PAGE_KERNEL_EXEC); +#endif - for (i = 0; i < (-PAGE_OFFSET)/PGDIR_SIZE; ++i) { - size_t o = (PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD + i; + /* + * Setup early PGD covering entire kernel which will allows + * us to reach paging_init(). We map all memory banks later + * in setup_vm_final() below. + */ + end_va = PAGE_OFFSET + load_sz; + for (va = PAGE_OFFSET; va < end_va; va += map_size) + create_pgd_mapping(early_pg_dir, va, + load_pa + (va - PAGE_OFFSET), + map_size, PAGE_KERNEL_EXEC); - swapper_pg_dir[o] = - pfn_pgd(PFN_DOWN(pa + i * PGDIR_SIZE), prot); + /* Create fixed mapping for early FDT parsing */ + end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE; + for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE) + create_pte_mapping(fixmap_pte, va, + dtb_pa + (va - __fix_to_virt(FIX_FDT)), + PAGE_SIZE, PAGE_KERNEL); + + /* Save pointer to DTB for early FDT parsing */ + dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK); +} + +static void __init setup_vm_final(void) +{ + uintptr_t va, map_size; + phys_addr_t pa, start, end; + struct memblock_region *reg; + + /* Set mmu_enabled flag */ + mmu_enabled = true; + + /* Setup swapper PGD for fixmap */ + create_pgd_mapping(swapper_pg_dir, FIXADDR_START, + __pa(fixmap_pgd_next), + PGDIR_SIZE, PAGE_TABLE); + + /* Map all memory banks */ + for_each_memblock(memory, reg) { + start = reg->base; + end = start + reg->size; + + if (start >= end) + break; + if (memblock_is_nomap(reg)) + continue; + if (start <= __pa(PAGE_OFFSET) && + __pa(PAGE_OFFSET) < end) + start = __pa(PAGE_OFFSET); + + map_size = best_map_size(start, end - start); + for (pa = start; pa < end; pa += map_size) { + va = (uintptr_t)__va(pa); + create_pgd_mapping(swapper_pg_dir, va, pa, + map_size, PAGE_KERNEL_EXEC); + } } - swapper_pg_dir[(FIXADDR_START >> PGDIR_SHIFT) % PTRS_PER_PGD] = - pfn_pgd(PFN_DOWN((uintptr_t)fixmap_pte), - __pgprot(_PAGE_TABLE)); -#endif + /* Clear fixmap PTE and PMD mappings */ + clear_fixmap(FIX_PTE); + clear_fixmap(FIX_PMD); + + /* Move to swapper page table */ + csr_write(sptbr, PFN_DOWN(__pa(swapper_pg_dir)) | SATP_MODE); + local_flush_tlb_all(); +} + +void __init paging_init(void) +{ + setup_vm_final(); + setup_zero_page(); + zone_sizes_init(); } diff --git a/arch/riscv/mm/sifive_l2_cache.c b/arch/riscv/mm/sifive_l2_cache.c index 4eb64619b3f4..2e637ad71c05 100644 --- a/arch/riscv/mm/sifive_l2_cache.c +++ b/arch/riscv/mm/sifive_l2_cache.c @@ -109,13 +109,14 @@ EXPORT_SYMBOL_GPL(unregister_sifive_l2_error_notifier); static irqreturn_t l2_int_handler(int irq, void *device) { - unsigned int regval, add_h, add_l; + unsigned int add_h, add_l; if (irq == g_irq[DIR_CORR]) { add_h = readl(l2_base + SIFIVE_L2_DIRECCFIX_HIGH); add_l = readl(l2_base + SIFIVE_L2_DIRECCFIX_LOW); pr_err("L2CACHE: DirError @ 0x%08X.%08X\n", add_h, add_l); - regval = readl(l2_base + SIFIVE_L2_DIRECCFIX_COUNT); + /* Reading this register clears the DirError interrupt sig */ + readl(l2_base + SIFIVE_L2_DIRECCFIX_COUNT); atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_CE, "DirECCFix"); } @@ -123,7 +124,8 @@ static irqreturn_t l2_int_handler(int irq, void *device) add_h = readl(l2_base + SIFIVE_L2_DATECCFIX_HIGH); add_l = readl(l2_base + SIFIVE_L2_DATECCFIX_LOW); pr_err("L2CACHE: DataError @ 0x%08X.%08X\n", add_h, add_l); - regval = readl(l2_base + SIFIVE_L2_DATECCFIX_COUNT); + /* Reading this register clears the DataError interrupt sig */ + readl(l2_base + SIFIVE_L2_DATECCFIX_COUNT); atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_CE, "DatECCFix"); } @@ -131,7 +133,8 @@ static irqreturn_t l2_int_handler(int irq, void *device) add_h = readl(l2_base + SIFIVE_L2_DATECCFAIL_HIGH); add_l = readl(l2_base + SIFIVE_L2_DATECCFAIL_LOW); pr_err("L2CACHE: DataFail @ 0x%08X.%08X\n", add_h, add_l); - regval = readl(l2_base + SIFIVE_L2_DATECCFAIL_COUNT); + /* Reading this register clears the DataFail interrupt sig */ + readl(l2_base + SIFIVE_L2_DATECCFAIL_COUNT); atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_UE, "DatECCFail"); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1342654e8057..78772870facd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -94,6 +94,7 @@ config X86 select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH select ARCH_WANTS_DYNAMIC_TASK_STRUCT + select ARCH_WANT_HUGE_PMD_SHARE select ARCH_WANTS_THP_SWAP if X86_64 select BUILDTIME_EXTABLE_SORT select CLKEVT_I8253 @@ -307,9 +308,6 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_SUSPEND_POSSIBLE def_bool y -config ARCH_WANT_HUGE_PMD_SHARE - def_bool y - config ARCH_WANT_GENERAL_HUGETLB def_bool y