diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 4e9828cda7a2..34a28227068d 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -319,6 +319,7 @@ static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages) static inline void remove_memory(int nid, u64 start, u64 size) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ +extern void __ref free_area_init_core_hotplug(int nid); extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); extern int add_memory(int nid, u64 start, u64 size); diff --git a/include/linux/mm.h b/include/linux/mm.h index a55f5389c491..a9e733b5fb76 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2015,7 +2015,7 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) extern void __init pagecache_init(void); extern void free_area_init(unsigned long * zones_size); -extern void free_area_init_node(int nid, unsigned long * zones_size, +extern void __init free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); extern void free_initmem(void); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 4eb6e824a80c..9eea6e809a4e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -982,8 +982,6 @@ static void reset_node_present_pages(pg_data_t *pgdat) static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) { struct pglist_data *pgdat; - unsigned long zones_size[MAX_NR_ZONES] = {0}; - unsigned long zholes_size[MAX_NR_ZONES] = {0}; unsigned long start_pfn = PFN_DOWN(start); pgdat = NODE_DATA(nid); @@ -1006,8 +1004,11 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) /* we can use NODE_DATA(nid) from here */ + pgdat->node_id = nid; + pgdat->node_start_pfn = start_pfn; + /* init node's zones as empty zones, we don't have any present pages.*/ - free_area_init_node(nid, zones_size, start_pfn, zholes_size); + free_area_init_core_hotplug(nid); pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat); /* @@ -1016,19 +1017,12 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) */ build_all_zonelists(pgdat); - /* - * zone->managed_pages is set to an approximate value in - * free_area_init_core(), which will cause - * /sys/device/system/node/nodeX/meminfo has wrong data. - * So reset it to 0 before any memory is onlined. - */ - reset_node_managed_pages(pgdat); - /* * When memory is hot-added, all the memory is in offline state. So * clear all zones' present_pages because they will be updated in * online_pages() and offline_pages(). */ + reset_node_managed_pages(pgdat); reset_node_present_pages(pgdat); return pgdat; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5b939bd1bff9..c677c1506d73 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6140,7 +6140,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone, #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ -void __meminit set_pageblock_order(void) +void __init set_pageblock_order(void) { unsigned int order; @@ -6168,13 +6168,13 @@ void __meminit set_pageblock_order(void) * include/linux/pageblock-flags.h for the values of pageblock_order based on * the kernel config */ -void __meminit set_pageblock_order(void) +void __init set_pageblock_order(void) { } #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ -static unsigned long __meminit calc_memmap_size(unsigned long spanned_pages, +static unsigned long __init calc_memmap_size(unsigned long spanned_pages, unsigned long present_pages) { unsigned long pages = spanned_pages; @@ -6225,19 +6225,8 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat) static void pgdat_init_kcompactd(struct pglist_data *pgdat) {} #endif -/* - * Set up the zone data structures: - * - mark all pages reserved - * - mark all memory queues empty - * - clear the memory bitmaps - * - * NOTE: pgdat should get zeroed by caller. - */ -static void __meminit free_area_init_core(struct pglist_data *pgdat) +static void __meminit pgdat_init_internals(struct pglist_data *pgdat) { - enum zone_type j; - int nid = pgdat->node_id; - pgdat_resize_init(pgdat); pgdat_init_numabalancing(pgdat); @@ -6250,7 +6239,54 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat) pgdat_page_ext_init(pgdat); spin_lock_init(&pgdat->lru_lock); lruvec_init(node_lruvec(pgdat)); +} +static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid, + unsigned long remaining_pages) +{ + zone->managed_pages = remaining_pages; + zone_set_nid(zone, nid); + zone->name = zone_names[idx]; + zone->zone_pgdat = NODE_DATA(nid); + spin_lock_init(&zone->lock); + zone_seqlock_init(zone); + zone_pcp_init(zone); +} + +/* + * Set up the zone data structures + * - init pgdat internals + * - init all zones belonging to this node + * + * NOTE: this function is only called during memory hotplug + */ +#ifdef CONFIG_MEMORY_HOTPLUG +void __ref free_area_init_core_hotplug(int nid) +{ + enum zone_type z; + pg_data_t *pgdat = NODE_DATA(nid); + + pgdat_init_internals(pgdat); + for (z = 0; z < MAX_NR_ZONES; z++) + zone_init_internals(&pgdat->node_zones[z], z, nid, 0); +} +#endif + +/* + * Set up the zone data structures: + * - mark all pages reserved + * - mark all memory queues empty + * - clear the memory bitmaps + * + * NOTE: pgdat should get zeroed by caller. + * NOTE: this function is only called during early init. + */ +static void __init free_area_init_core(struct pglist_data *pgdat) +{ + enum zone_type j; + int nid = pgdat->node_id; + + pgdat_init_internals(pgdat); pgdat->per_cpu_nodestats = &boot_nodestats; for (j = 0; j < MAX_NR_ZONES; j++) { @@ -6298,13 +6334,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat) * when the bootmem allocator frees pages into the buddy system. * And all highmem pages will be managed by the buddy system. */ - zone->managed_pages = freesize; - zone_set_nid(zone, nid); - zone->name = zone_names[j]; - zone->zone_pgdat = pgdat; - spin_lock_init(&zone->lock); - zone_seqlock_init(zone); - zone_pcp_init(zone); + zone_init_internals(zone, j, nid, freesize); if (!size) continue; @@ -6379,7 +6409,7 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat) static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {} #endif -void __meminit free_area_init_node(int nid, unsigned long *zones_size, +void __init free_area_init_node(int nid, unsigned long *zones_size, unsigned long node_start_pfn, unsigned long *zholes_size) { @@ -6418,7 +6448,7 @@ void __meminit free_area_init_node(int nid, unsigned long *zones_size, * may be accessed (for example page_to_pfn() on some configuration accesses * flags). We must explicitly zero those struct pages. */ -void __meminit zero_resv_unavail(void) +void __init zero_resv_unavail(void) { phys_addr_t start, end; unsigned long pfn;