mm, x86/mm: Untangle address space layout definitions from basic pgtable type definitions

- Untangle the somewhat incestous way of how VMALLOC_START is used all across the kernel, but is, on x86, defined deep inside one of the lowest level page table headers. It doesn't help that vmalloc.h only includes a single asm header: #include <asm/page.h> /* pgprot_t */ So there was no existing cross-arch way to decouple address layout definitions from page.h details. I used this: #ifndef VMALLOC_START # include <asm/vmalloc.h> #endif This way every architecture that wants to simplify page.h can do so. - Also on x86 we had a couple of LDT related inline functions that used the late-stage address space layout positions - but these could be uninlined without real trouble - the end result is cleaner this way as well. Signed-off-by: Ingo Molnar <mingo@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Rik van Riel <riel@redhat.com> Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
2019-11-29 08:17:25 +01:00 · 2019-11-29 08:17:25 +01:00 · 186525bd6b
parent 1f059dfdf5
commit 186525bd6b
15 changed files with 183 additions and 154 deletions
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@ -6,6 +6,7 @@
 #include <linux/percpu-defs.h>
 #include <asm/processor.h>
 #include <asm/intel_ds.h>
+#include <asm/pgtable_areas.h>

 #ifdef CONFIG_X86_64

@ -134,15 +135,6 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
 extern void setup_cpu_entry_areas(void);
 extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);

-/* Single page reserved for the readonly IDT mapping: */
-#define	CPU_ENTRY_AREA_RO_IDT		CPU_ENTRY_AREA_BASE
-#define CPU_ENTRY_AREA_PER_CPU		(CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
-
-#define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)
-
-#define CPU_ENTRY_AREA_MAP_SIZE			\
-	(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
-
 extern struct cpu_entry_area *get_cpu_entry_area(int cpu);

 static inline struct entry_stack *cpu_entry_stack(int cpu)
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@ -69,14 +69,6 @@ struct ldt_struct {
 	int			slot;
 };

-/* This is a multiple of PAGE_SIZE. */
-#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
-
-static inline void *ldt_slot_va(int slot)
-{
-	return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
-}
-
 /*
 * Used for LDT copy/destruction.
 */
@ -99,87 +91,21 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { }
 static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
 #endif

+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+extern void load_mm_ldt(struct mm_struct *mm);
+extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next);
+#else
 static inline void load_mm_ldt(struct mm_struct *mm)
 {
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
-	struct ldt_struct *ldt;
-
-	/* READ_ONCE synchronizes with smp_store_release */
-	ldt = READ_ONCE(mm->context.ldt);
-
-	/*
-	 * Any change to mm->context.ldt is followed by an IPI to all
-	 * CPUs with the mm active.  The LDT will not be freed until
-	 * after the IPI is handled by all such CPUs.  This means that,
-	 * if the ldt_struct changes before we return, the values we see
-	 * will be safe, and the new values will be loaded before we run
-	 * any user code.
-	 *
-	 * NB: don't try to convert this to use RCU without extreme care.
-	 * We would still need IRQs off, because we don't want to change
-	 * the local LDT after an IPI loaded a newer value than the one
-	 * that we can see.
-	 */
-
-	if (unlikely(ldt)) {
-		if (static_cpu_has(X86_FEATURE_PTI)) {
-			if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
-				/*
-				 * Whoops -- either the new LDT isn't mapped
-				 * (if slot == -1) or is mapped into a bogus
-				 * slot (if slot > 1).
-				 */
-				clear_LDT();
-				return;
-			}
-
-			/*
-			 * If page table isolation is enabled, ldt->entries
-			 * will not be mapped in the userspace pagetables.
-			 * Tell the CPU to access the LDT through the alias
-			 * at ldt_slot_va(ldt->slot).
-			 */
-			set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
-		} else {
-			set_ldt(ldt->entries, ldt->nr_entries);
-		}
-	} else {
-		clear_LDT();
-	}
-#else
 	clear_LDT();
-#endif
 }
-
 static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
 {
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
-	/*
-	 * Load the LDT if either the old or new mm had an LDT.
-	 *
-	 * An mm will never go from having an LDT to not having an LDT.  Two
-	 * mms never share an LDT, so we don't gain anything by checking to
-	 * see whether the LDT changed.  There's also no guarantee that
-	 * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
-	 * then prev->context.ldt will also be non-NULL.
-	 *
-	 * If we really cared, we could optimize the case where prev == next
-	 * and we're exiting lazy mode.  Most of the time, if this happens,
-	 * we don't actually need to reload LDTR, but modify_ldt() is mostly
-	 * used by legacy code and emulators where we don't need this level of
-	 * performance.
-	 *
-	 * This uses | instead of || because it generates better code.
-	 */
-	if (unlikely((unsigned long)prev->context.ldt |
-		     (unsigned long)next->context.ldt))
-		load_mm_ldt(next);
-#endif
-
 	DEBUG_LOCKS_WARN_ON(preemptible());
 }
+#endif

-void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);

 /*
 * Init a new mm.  Used on mm copies, like at fork()
--- a/arch/x86/include/asm/pgtable_32_areas.h
+++ b/arch/x86/include/asm/pgtable_32_areas.h
@ -0,0 +1,53 @@
+#ifndef _ASM_X86_PGTABLE_32_AREAS_H
+#define _ASM_X86_PGTABLE_32_AREAS_H
+
+#include <asm/cpu_entry_area.h>
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+ * physical memory until the kernel virtual memory starts.  That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ */
+#define VMALLOC_OFFSET	(8 * 1024 * 1024)
+
+#ifndef __ASSEMBLY__
+extern bool __vmalloc_start_set; /* set once high_memory is set */
+#endif
+
+#define VMALLOC_START	((unsigned long)high_memory + VMALLOC_OFFSET)
+#ifdef CONFIG_X86_PAE
+#define LAST_PKMAP 512
+#else
+#define LAST_PKMAP 1024
+#endif
+
+#define CPU_ENTRY_AREA_PAGES		(NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE))
+
+/* The +1 is for the readonly IDT page: */
+#define CPU_ENTRY_AREA_BASE	\
+	((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
+
+#define LDT_BASE_ADDR		\
+	((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
+
+#define LDT_END_ADDR		(LDT_BASE_ADDR + PMD_SIZE)
+
+#define PKMAP_BASE		\
+	((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
+
+#ifdef CONFIG_HIGHMEM
+# define VMALLOC_END	(PKMAP_BASE - 2 * PAGE_SIZE)
+#else
+# define VMALLOC_END	(LDT_BASE_ADDR - 2 * PAGE_SIZE)
+#endif
+
+#define MODULES_VADDR	VMALLOC_START
+#define MODULES_END	VMALLOC_END
+#define MODULES_LEN	(MODULES_VADDR - MODULES_END)
+
+#define MAXMEM	(VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
+
+#endif /* _ASM_X86_PGTABLE_32_AREAS_H */
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PGTABLE_32_DEFS_H
-#define _ASM_X86_PGTABLE_32_DEFS_H
+#ifndef _ASM_X86_PGTABLE_32_TYPES_H
+#define _ASM_X86_PGTABLE_32_TYPES_H

 /*
 * The Linux x86 paging architecture is 'compile-time dual-mode', it
@ -20,55 +20,4 @@
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))

-/* Just any arbitrary offset to the start of the vmalloc VM area: the
- * current 8MB value just means that there will be a 8MB "hole" after the
- * physical memory until the kernel virtual memory starts.  That means that
- * any out-of-bounds memory accesses will hopefully be caught.
- * The vmalloc() routines leaves a hole of 4kB between each vmalloced
- * area for the same reason. ;)
- */
-#define VMALLOC_OFFSET	(8 * 1024 * 1024)
-
-#ifndef __ASSEMBLY__
-extern bool __vmalloc_start_set; /* set once high_memory is set */
-#endif
-
-#define VMALLOC_START	((unsigned long)high_memory + VMALLOC_OFFSET)
-#ifdef CONFIG_X86_PAE
-#define LAST_PKMAP 512
-#else
-#define LAST_PKMAP 1024
-#endif
-
-/*
- * This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE.
- * Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c
- * to avoid include recursion hell.
- */
-#define CPU_ENTRY_AREA_PAGES	(NR_CPUS * 43)
-
-/* The +1 is for the readonly IDT page: */
-#define CPU_ENTRY_AREA_BASE	\
-	((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
-
-#define LDT_BASE_ADDR		\
-	((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
-
-#define LDT_END_ADDR		(LDT_BASE_ADDR + PMD_SIZE)
-
-#define PKMAP_BASE		\
-	((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
-
-#ifdef CONFIG_HIGHMEM
-# define VMALLOC_END	(PKMAP_BASE - 2 * PAGE_SIZE)
-#else
-# define VMALLOC_END	(LDT_BASE_ADDR - 2 * PAGE_SIZE)
-#endif
-
-#define MODULES_VADDR	VMALLOC_START
-#define MODULES_END	VMALLOC_END
-#define MODULES_LEN	(MODULES_VADDR - MODULES_END)
-
-#define MAXMEM	(VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
-
-#endif /* _ASM_X86_PGTABLE_32_DEFS_H */
+#endif /* _ASM_X86_PGTABLE_32_TYPES_H */
--- a/arch/x86/include/asm/pgtable_areas.h
+++ b/arch/x86/include/asm/pgtable_areas.h
@ -0,0 +1,16 @@
+#ifndef _ASM_X86_PGTABLE_AREAS_H
+#define _ASM_X86_PGTABLE_AREAS_H
+
+#ifdef CONFIG_X86_32
+# include <asm/pgtable_32_areas.h>
+#endif
+
+/* Single page reserved for the readonly IDT mapping: */
+#define CPU_ENTRY_AREA_RO_IDT		CPU_ENTRY_AREA_BASE
+#define CPU_ENTRY_AREA_PER_CPU		(CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+#define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)
+
+#define CPU_ENTRY_AREA_MAP_SIZE		(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
+
+#endif /* _ASM_X86_PGTABLE_AREAS_H */
--- a/arch/x86/include/asm/vmalloc.h
+++ b/arch/x86/include/asm/vmalloc.h
@ -1,4 +1,6 @@
 #ifndef _ASM_X86_VMALLOC_H
 #define _ASM_X86_VMALLOC_H

+#include <asm/pgtable_areas.h>
+
 #endif /* _ASM_X86_VMALLOC_H */
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@ -28,6 +28,89 @@
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
 #include <asm/syscalls.h>
+#include <asm/pgtable_areas.h>
+
+/* This is a multiple of PAGE_SIZE. */
+#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
+
+static inline void *ldt_slot_va(int slot)
+{
+	return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
+}
+
+void load_mm_ldt(struct mm_struct *mm)
+{
+	struct ldt_struct *ldt;
+
+	/* READ_ONCE synchronizes with smp_store_release */
+	ldt = READ_ONCE(mm->context.ldt);
+
+	/*
+	 * Any change to mm->context.ldt is followed by an IPI to all
+	 * CPUs with the mm active.  The LDT will not be freed until
+	 * after the IPI is handled by all such CPUs.  This means that,
+	 * if the ldt_struct changes before we return, the values we see
+	 * will be safe, and the new values will be loaded before we run
+	 * any user code.
+	 *
+	 * NB: don't try to convert this to use RCU without extreme care.
+	 * We would still need IRQs off, because we don't want to change
+	 * the local LDT after an IPI loaded a newer value than the one
+	 * that we can see.
+	 */
+
+	if (unlikely(ldt)) {
+		if (static_cpu_has(X86_FEATURE_PTI)) {
+			if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
+				/*
+				 * Whoops -- either the new LDT isn't mapped
+				 * (if slot == -1) or is mapped into a bogus
+				 * slot (if slot > 1).
+				 */
+				clear_LDT();
+				return;
+			}
+
+			/*
+			 * If page table isolation is enabled, ldt->entries
+			 * will not be mapped in the userspace pagetables.
+			 * Tell the CPU to access the LDT through the alias
+			 * at ldt_slot_va(ldt->slot).
+			 */
+			set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
+		} else {
+			set_ldt(ldt->entries, ldt->nr_entries);
+		}
+	} else {
+		clear_LDT();
+	}
+}
+
+void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
+{
+	/*
+	 * Load the LDT if either the old or new mm had an LDT.
+	 *
+	 * An mm will never go from having an LDT to not having an LDT.  Two
+	 * mms never share an LDT, so we don't gain anything by checking to
+	 * see whether the LDT changed.  There's also no guarantee that
+	 * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
+	 * then prev->context.ldt will also be non-NULL.
+	 *
+	 * If we really cared, we could optimize the case where prev == next
+	 * and we're exiting lazy mode.  Most of the time, if this happens,
+	 * we don't actually need to reload LDTR, but modify_ldt() is mostly
+	 * used by legacy code and emulators where we don't need this level of
+	 * performance.
+	 *
+	 * This uses | instead of || because it generates better code.
+	 */
+	if (unlikely((unsigned long)prev->context.ldt |
+		     (unsigned long)next->context.ldt))
+		load_mm_ldt(next);
+
+	DEBUG_LOCKS_WARN_ON(preemptible());
+}

 static void refresh_ldt_segments(void)
 {
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@ -41,6 +41,7 @@
 #include <asm/proto.h>
 #include <asm/unwind.h>
 #include <asm/vsyscall.h>
+#include <linux/vmalloc.h>

 /*
 * max_low_pfn_mapped: highest directly mapped pfn < 4 GB
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@ -29,6 +29,7 @@
 #include <asm/efi.h>			/* efi_recover_from_page_fault()*/
 #include <asm/desc.h>			/* store_idt(), ...		*/
 #include <asm/cpu_entry_area.h>		/* exception stack		*/
+#include <asm/pgtable_areas.h>		/* VMALLOC_START, ...		*/

 #define CREATE_TRACE_POINTS
 #include <asm/trace/exceptions.h>
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@ -52,6 +52,7 @@
 #include <asm/page_types.h>
 #include <asm/cpu_entry_area.h>
 #include <asm/init.h>
+#include <asm/pgtable_areas.h>

 #include "mm_internal.h"

--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@ -18,6 +18,7 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/io.h>
+#include <linux/vmalloc.h>

 unsigned int __VMALLOC_RESERVE = 128 << 20;

--- a/arch/x86/mm/physaddr.c
+++ b/arch/x86/mm/physaddr.c
@ -5,6 +5,7 @@
 #include <linux/mm.h>

 #include <asm/page.h>
+#include <linux/vmalloc.h>

 #include "physaddr.h"

--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@ -625,24 +625,19 @@ unsigned long vmalloc_to_pfn(const void *addr);
 * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there
 * is no special casing required.
 */
-static inline bool is_vmalloc_addr(const void *x)
-{
-#ifdef CONFIG_MMU
-	unsigned long addr = (unsigned long)x;
-
-	return addr >= VMALLOC_START && addr < VMALLOC_END;
-#else
-	return false;
-#endif
-}

 #ifndef is_ioremap_addr
 #define is_ioremap_addr(x) is_vmalloc_addr(x)
 #endif

 #ifdef CONFIG_MMU
+extern bool is_vmalloc_addr(const void *x);
 extern int is_vmalloc_or_module_addr(const void *x);
 #else
+static inline bool is_vmalloc_addr(const void *x)
+{
+	return false;
+}
 static inline int is_vmalloc_or_module_addr(const void *x)
 {
 	return 0;
--- a/mm/highmem.c
+++ b/mm/highmem.c
@ -29,7 +29,7 @@
 #include <linux/highmem.h>
 #include <linux/kgdb.h>
 #include <asm/tlbflush.h>
-
+#include <linux/vmalloc.h>

 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
 DEFINE_PER_CPU(int, __kmap_atomic_idx);
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@ -41,6 +41,14 @@

 #include "internal.h"

+bool is_vmalloc_addr(const void *x)
+{
+	unsigned long addr = (unsigned long)x;
+
+	return addr >= VMALLOC_START && addr < VMALLOC_END;
+}
+EXPORT_SYMBOL(is_vmalloc_addr);
+
 struct vfree_deferred {
 	struct llist_head list;
 	struct work_struct wq;