diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index c02dc9809ca5..b3b1e9c1770a 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -552,27 +552,18 @@ void __init do_init_bootmem(void) /* Add all physical memory to the bootmem map, mark each area * present. */ - for (i=0; i < lmb.memory.cnt; i++) { - unsigned long base, size; - unsigned long start_pfn, end_pfn; - - base = lmb.memory.region[i].base; - size = lmb.memory.region[i].size; - - start_pfn = base >> PAGE_SHIFT; - end_pfn = start_pfn + (size >> PAGE_SHIFT); - memory_present(0, start_pfn, end_pfn); - - free_bootmem(base, size); - } + for (i=0; i < lmb.memory.cnt; i++) + free_bootmem(lmb_start_pfn(&lmb.memory, i), + lmb_size_bytes(&lmb.memory, i)); /* reserve the sections we're already using */ - for (i=0; i < lmb.reserved.cnt; i++) { - unsigned long base = lmb.reserved.region[i].base; - unsigned long size = lmb.reserved.region[i].size; + for (i=0; i < lmb.reserved.cnt; i++) + reserve_bootmem(lmb_start_pfn(&lmb.reserved, i), + lmb_size_bytes(&lmb.reserved, i)); - reserve_bootmem(base, size); - } + for (i=0; i < lmb.memory.cnt; i++) + memory_present(0, lmb_start_pfn(&lmb.memory, i), + lmb_end_pfn(&lmb.memory, i)); } /* diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c index c3116f0d788c..cb864b8f2750 100644 --- a/arch/ppc64/mm/numa.c +++ b/arch/ppc64/mm/numa.c @@ -440,8 +440,6 @@ new_range: for (i = start ; i < (start+size); i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = numa_domain; - memory_present(numa_domain, start >> PAGE_SHIFT, - (start + size) >> PAGE_SHIFT); if (--ranges) goto new_range; @@ -483,7 +481,6 @@ static void __init setup_nonnuma(void) for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; - memory_present(0, 0, init_node_data[0].node_end_pfn); } static void __init dump_numa_topology(void) @@ -695,6 +692,46 @@ new_range: size); } } + /* + * This loop may look famaliar, but we have to do it again + * after marking our reserved memory to mark memory present + * for sparsemem. + */ + addr_cells = get_mem_addr_cells(); + size_cells = get_mem_size_cells(); + memory = NULL; + while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { + unsigned long mem_start, mem_size; + int numa_domain, ranges; + unsigned int *memcell_buf; + unsigned int len; + + memcell_buf = (unsigned int *)get_property(memory, "reg", &len); + if (!memcell_buf || len <= 0) + continue; + + ranges = memory->n_addrs; /* ranges in cell */ +new_range2: + mem_start = read_n_cells(addr_cells, &memcell_buf); + mem_size = read_n_cells(size_cells, &memcell_buf); + if (numa_enabled) { + numa_domain = of_node_numa_domain(memory); + if (numa_domain >= MAX_NUMNODES) + numa_domain = 0; + } else + numa_domain = 0; + + if (numa_domain != nid) + continue; + + mem_size = numa_enforce_memory_limit(mem_start, mem_size); + memory_present(numa_domain, mem_start >> PAGE_SHIFT, + (mem_start + mem_size) >> PAGE_SHIFT); + + if (--ranges) /* process all ranges in cell */ + goto new_range2; + } + } } diff --git a/include/asm-ppc64/lmb.h b/include/asm-ppc64/lmb.h index cb368bf0f264..de91e034bd98 100644 --- a/include/asm-ppc64/lmb.h +++ b/include/asm-ppc64/lmb.h @@ -56,4 +56,26 @@ extern void lmb_dump_all(void); extern unsigned long io_hole_start; +static inline unsigned long +lmb_size_bytes(struct lmb_region *type, unsigned long region_nr) +{ + return type->region[region_nr].size; +} +static inline unsigned long +lmb_size_pages(struct lmb_region *type, unsigned long region_nr) +{ + return lmb_size_bytes(type, region_nr) >> PAGE_SHIFT; +} +static inline unsigned long +lmb_start_pfn(struct lmb_region *type, unsigned long region_nr) +{ + return type->region[region_nr].base >> PAGE_SHIFT; +} +static inline unsigned long +lmb_end_pfn(struct lmb_region *type, unsigned long region_nr) +{ + return lmb_start_pfn(type, region_nr) + + lmb_size_pages(type, region_nr); +} + #endif /* _PPC64_LMB_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6c90461ed99f..b97054bbc394 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -487,6 +487,28 @@ struct mem_section { unsigned long section_mem_map; }; +#ifdef CONFIG_ARCH_SPARSEMEM_EXTREME +/* + * Should we ever require GCC 4 or later then the flat array scheme + * can be eliminated and a uniform solution for EXTREME and !EXTREME can + * be arrived at. + */ +#define SECTION_ROOT_SHIFT (PAGE_SHIFT-3) +#define SECTION_ROOT_MASK ((1UL<> SECTION_ROOT_SHIFT) +#define NR_SECTION_ROOTS (NR_MEM_SECTIONS >> SECTION_ROOT_SHIFT) + +extern struct mem_section *mem_section[NR_SECTION_ROOTS]; + +static inline struct mem_section *__nr_to_section(unsigned long nr) +{ + if (!mem_section[SECTION_TO_ROOT(nr)]) + return NULL; + return &mem_section[SECTION_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; +} + +#else + extern struct mem_section mem_section[NR_MEM_SECTIONS]; static inline struct mem_section *__nr_to_section(unsigned long nr) @@ -494,6 +516,10 @@ static inline struct mem_section *__nr_to_section(unsigned long nr) return &mem_section[nr]; } +#define sparse_index_init(_sec, _nid) do {} while (0) + +#endif + /* * We use the lower bits of the mem_map pointer to store * a little bit of information. There should be at least @@ -513,12 +539,12 @@ static inline struct page *__section_mem_map_addr(struct mem_section *section) static inline int valid_section(struct mem_section *section) { - return (section->section_mem_map & SECTION_MARKED_PRESENT); + return (section && (section->section_mem_map & SECTION_MARKED_PRESENT)); } static inline int section_has_mem_map(struct mem_section *section) { - return (section->section_mem_map & SECTION_HAS_MEM_MAP); + return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP)); } static inline int valid_section_nr(unsigned long nr) diff --git a/mm/Kconfig b/mm/Kconfig index cd379936cac6..fc644c5c065d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -89,3 +89,12 @@ config NEED_MULTIPLE_NODES config HAVE_MEMORY_PRESENT def_bool y depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM + +# +# Architectecture platforms which require a two level mem_section in SPARSEMEM +# must select this option. This is usually for architecture platforms with +# an extremely sparse physical address space. +# +config ARCH_SPARSEMEM_EXTREME + def_bool n + depends on SPARSEMEM && 64BIT diff --git a/mm/sparse.c b/mm/sparse.c index b54e304df4a7..b2b456bf0a5d 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -13,7 +13,26 @@ * * 1) mem_section - memory sections, mem_map's for valid memory */ -struct mem_section mem_section[NR_MEM_SECTIONS]; +#ifdef CONFIG_ARCH_SPARSEMEM_EXTREME +struct mem_section *mem_section[NR_SECTION_ROOTS] + ____cacheline_maxaligned_in_smp; + +static void sparse_index_init(unsigned long section, int nid) +{ + unsigned long root = SECTION_TO_ROOT(section); + + if (mem_section[root]) + return; + mem_section[root] = alloc_bootmem_node(NODE_DATA(nid), PAGE_SIZE); + if (mem_section[root]) + memset(mem_section[root], 0, PAGE_SIZE); + else + panic("memory_present: NO MEMORY\n"); +} +#else +struct mem_section mem_section[NR_MEM_SECTIONS] + ____cacheline_maxaligned_in_smp; +#endif EXPORT_SYMBOL(mem_section); /* Record a memory area against a node. */ @@ -24,8 +43,13 @@ void memory_present(int nid, unsigned long start, unsigned long end) start &= PAGE_SECTION_MASK; for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { unsigned long section = pfn_to_section_nr(pfn); - if (!mem_section[section].section_mem_map) - mem_section[section].section_mem_map = SECTION_MARKED_PRESENT; + struct mem_section *ms; + + sparse_index_init(section, nid); + + ms = __nr_to_section(section); + if (!ms->section_mem_map) + ms->section_mem_map = SECTION_MARKED_PRESENT; } } @@ -85,6 +109,7 @@ static struct page *sparse_early_mem_map_alloc(unsigned long pnum) { struct page *map; int nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); + struct mem_section *ms = __nr_to_section(pnum); map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); if (map) @@ -96,7 +121,7 @@ static struct page *sparse_early_mem_map_alloc(unsigned long pnum) return map; printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); - mem_section[pnum].section_mem_map = 0; + ms->section_mem_map = 0; return NULL; } @@ -114,8 +139,9 @@ void sparse_init(void) continue; map = sparse_early_mem_map_alloc(pnum); - if (map) - sparse_init_one_section(&mem_section[pnum], pnum, map); + if (!map) + continue; + sparse_init_one_section(__nr_to_section(pnum), pnum, map); } }