diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index a5ecc9c33e92..7f3eba08e7de 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -172,6 +172,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_all(); } -extern void zap_low_mappings(void); +extern void zap_low_mappings(bool early); #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7c80007ea5f7..2fecda69ee64 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -873,7 +873,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) err = do_boot_cpu(apicid, cpu); - zap_low_mappings(); + zap_low_mappings(false); low_mappings = 0; #else err = do_boot_cpu(apicid, cpu); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 949708d7a481..9ff3c0816d15 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -564,7 +564,7 @@ static inline void save_pg_dir(void) } #endif /* !CONFIG_ACPI_SLEEP */ -void zap_low_mappings(void) +void zap_low_mappings(bool early) { int i; @@ -581,7 +581,11 @@ void zap_low_mappings(void) set_pgd(swapper_pg_dir+i, __pgd(0)); #endif } - flush_tlb_all(); + + if (early) + __flush_tlb(); + else + flush_tlb_all(); } pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); @@ -956,7 +960,7 @@ void __init mem_init(void) test_wp_bit(); save_pg_dir(); - zap_low_mappings(); + zap_low_mappings(true); } #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0bbc15f54536..3760e7c5de02 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -85,6 +85,9 @@ struct vm_area_struct; __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ __GFP_NORETRY|__GFP_NOMEMALLOC) +/* Control slab gfp mask during early boot */ +#define SLAB_GFP_BOOT_MASK __GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS) + /* Control allocation constraints */ #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 7339c7bf7331..13f126c89ae8 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -18,7 +18,19 @@ struct page_cgroup { }; void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat); -void __init page_cgroup_init(void); + +#ifdef CONFIG_SPARSEMEM +static inline void __init page_cgroup_init_flatmem(void) +{ +} +extern void __init page_cgroup_init(void); +#else +void __init page_cgroup_init_flatmem(void); +static inline void __init page_cgroup_init(void) +{ +} +#endif + struct page_cgroup *lookup_page_cgroup(struct page *page); enum { @@ -87,6 +99,10 @@ static inline void page_cgroup_init(void) { } +static inline void __init page_cgroup_init_flatmem(void) +{ +} + #endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP diff --git a/include/linux/slab.h b/include/linux/slab.h index 48803064cedf..219b8fb4651d 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -319,4 +319,6 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node) return kmalloc_node(size, flags | __GFP_ZERO, node); } +void __init kmem_cache_init_late(void); + #endif /* _LINUX_SLAB_H */ diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h index 0ec00b39d006..bb5368df4be8 100644 --- a/include/linux/slob_def.h +++ b/include/linux/slob_def.h @@ -34,4 +34,9 @@ static __always_inline void *__kmalloc(size_t size, gfp_t flags) return kmalloc(size, flags); } +static inline void kmem_cache_init_late(void) +{ + /* Nothing to do */ +} + #endif /* __LINUX_SLOB_DEF_H */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index be5d40c43bd2..4dcbc2c71491 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -302,4 +302,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) } #endif +void __init kmem_cache_init_late(void); + #endif /* _LINUX_SLUB_DEF_H */ diff --git a/init/main.c b/init/main.c index 5616661eac01..f6204f712e7c 100644 --- a/init/main.c +++ b/init/main.c @@ -539,6 +539,11 @@ void __init __weak thread_info_cache_init(void) */ static void __init mm_init(void) { + /* + * page_cgroup requires countinous pages as memmap + * and it's bigger than MAX_ORDER unless SPARSEMEM. + */ + page_cgroup_init_flatmem(); mem_init(); kmem_cache_init(); vmalloc_init(); @@ -635,6 +640,7 @@ asmlinkage void __init start_kernel(void) "enabled early\n"); early_boot_irqs_on(); local_irq_enable(); + kmem_cache_init_late(); /* * HACK ALERT! This is early. We're enabling the console before diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 104578541230..065205bdd920 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -45,7 +45,7 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc) #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) static void __init init_irq_default_affinity(void) { - alloc_bootmem_cpumask_var(&irq_default_affinity); + alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); cpumask_setall(irq_default_affinity); } #else diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 3dd4a909a1de..11a8a10a3909 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -47,8 +47,6 @@ static int __init alloc_node_page_cgroup(int nid) struct page_cgroup *base, *pc; unsigned long table_size; unsigned long start_pfn, nr_pages, index; - struct page *page; - unsigned int order; start_pfn = NODE_DATA(nid)->node_start_pfn; nr_pages = NODE_DATA(nid)->node_spanned_pages; @@ -57,13 +55,11 @@ static int __init alloc_node_page_cgroup(int nid) return 0; table_size = sizeof(struct page_cgroup) * nr_pages; - order = get_order(table_size); - page = alloc_pages_node(nid, GFP_NOWAIT | __GFP_ZERO, order); - if (!page) - page = alloc_pages_node(-1, GFP_NOWAIT | __GFP_ZERO, order); - if (!page) + + base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), + table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + if (!base) return -ENOMEM; - base = page_address(page); for (index = 0; index < nr_pages; index++) { pc = base + index; __init_page_cgroup(pc, start_pfn + index); @@ -73,7 +69,7 @@ static int __init alloc_node_page_cgroup(int nid) return 0; } -void __init page_cgroup_init(void) +void __init page_cgroup_init_flatmem(void) { int nid, fail; @@ -117,16 +113,11 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn) if (!section->page_cgroup) { nid = page_to_nid(pfn_to_page(pfn)); table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; - if (slab_is_available()) { - base = kmalloc_node(table_size, - GFP_KERNEL | __GFP_NOWARN, nid); - if (!base) - base = vmalloc_node(table_size, nid); - } else { - base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), - table_size, - PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); - } + VM_BUG_ON(!slab_is_available()); + base = kmalloc_node(table_size, + GFP_KERNEL | __GFP_NOWARN, nid); + if (!base) + base = vmalloc_node(table_size, nid); } else { /* * We don't have to allocate page_cgroup again, but diff --git a/mm/slab.c b/mm/slab.c index f46b65d124e5..18e3164de09a 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -303,6 +303,12 @@ struct kmem_list3 { int free_touched; /* updated without locking */ }; +/* + * The slab allocator is initialized with interrupts disabled. Therefore, make + * sure early boot allocations don't accidentally enable interrupts. + */ +static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK; + /* * Need this for bootstrapping a per node allocator. */ @@ -753,6 +759,7 @@ static enum { NONE, PARTIAL_AC, PARTIAL_L3, + EARLY, FULL } g_cpucache_up; @@ -761,7 +768,7 @@ static enum { */ int slab_is_available(void) { - return g_cpucache_up == FULL; + return g_cpucache_up >= EARLY; } static DEFINE_PER_CPU(struct delayed_work, reap_work); @@ -1625,19 +1632,27 @@ void __init kmem_cache_init(void) } } - /* 6) resize the head arrays to their final sizes */ - { - struct kmem_cache *cachep; - mutex_lock(&cache_chain_mutex); - list_for_each_entry(cachep, &cache_chain, next) - if (enable_cpucache(cachep, GFP_NOWAIT)) - BUG(); - mutex_unlock(&cache_chain_mutex); - } + g_cpucache_up = EARLY; /* Annotate slab for lockdep -- annotate the malloc caches */ init_lock_keys(); +} +void __init kmem_cache_init_late(void) +{ + struct kmem_cache *cachep; + + /* + * Interrupts are enabled now so all GFP allocations are safe. + */ + slab_gfp_mask = __GFP_BITS_MASK; + + /* 6) resize the head arrays to their final sizes */ + mutex_lock(&cache_chain_mutex); + list_for_each_entry(cachep, &cache_chain, next) + if (enable_cpucache(cachep, GFP_NOWAIT)) + BUG(); + mutex_unlock(&cache_chain_mutex); /* Done! */ g_cpucache_up = FULL; @@ -2102,7 +2117,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) for_each_online_node(node) { cachep->nodelists[node] = kmalloc_node(sizeof(struct kmem_list3), - GFP_KERNEL, node); + gfp, node); BUG_ON(!cachep->nodelists[node]); kmem_list3_init(cachep->nodelists[node]); } @@ -3354,6 +3369,8 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, unsigned long save_flags; void *ptr; + flags &= slab_gfp_mask; + lockdep_trace_alloc(flags); if (slab_should_failslab(cachep, flags)) @@ -3434,6 +3451,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) unsigned long save_flags; void *objp; + flags &= slab_gfp_mask; + lockdep_trace_alloc(flags); if (slab_should_failslab(cachep, flags)) diff --git a/mm/slub.c b/mm/slub.c index 3964d3ce4c15..30354bfeb43d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -178,6 +178,12 @@ static enum { SYSFS /* Sysfs up */ } slab_state = DOWN; +/* + * The slab allocator is initialized with interrupts disabled. Therefore, make + * sure early boot allocations don't accidentally enable interrupts. + */ +static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK; + /* A list of all slab caches on the system */ static DECLARE_RWSEM(slub_lock); static LIST_HEAD(slab_caches); @@ -1595,6 +1601,8 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, unsigned long flags; unsigned int objsize; + gfpflags &= slab_gfp_mask; + lockdep_trace_alloc(gfpflags); might_sleep_if(gfpflags & __GFP_WAIT); @@ -3104,6 +3112,14 @@ void __init kmem_cache_init(void) nr_cpu_ids, nr_node_ids); } +void __init kmem_cache_init_late(void) +{ + /* + * Interrupts are enabled now so all GFP allocations are safe. + */ + slab_gfp_mask = __GFP_BITS_MASK; +} + /* * Find a mergeable slab cache */