x86-64, NUMA: Kill numa_nodes[]

numa_nodes[] doesn't carry any information which isn't present in
numa_meminfo.  Each entry is simply min/max range of all the memblks
for the node.  This is not only redundant but also inaccurate when
memblks for different nodes interleave - for example,
find_node_by_addr() can return the wrong nodeid.

Kill numa_nodes[] and always use numa_meminfo instead.

* nodes_cover_memory() is renamed to numa_meminfo_cover_memory() and
  now operations on numa_meminfo and returns bool.

* setup_node_bootmem() needs min/max range.  Compute the range on the
  fly.  setup_node_bootmem() invocation is restructured to use outer
  loop instead of hardcoding the double invocations.

* find_node_by_addr() now operates on numa_meminfo.

* setup_physnodes() builds physnodes[] from memblks.  This will go
  away when emulation code is updated to use struct numa_meminfo.

This patch also makes the following misc changes.

* Clearing of nodes_add[] clearing is converted to memset().

* numa_add_memblk() in amd_numa_init() is moved down a bit for
  consistency.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Shaohui Zheng <shaohui.zheng@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@linux.intel.com>
This commit is contained in:
Tejun Heo 2011-02-16 17:11:09 +01:00
parent a844ef46fa
commit 91556237ec
4 changed files with 53 additions and 58 deletions

View file

@ -26,7 +26,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
extern nodemask_t cpu_nodes_parsed __initdata;
extern nodemask_t mem_nodes_parsed __initdata;
extern struct bootnode numa_nodes[MAX_NUMNODES] __initdata;
extern int __cpuinit numa_cpu_node(int cpu);
extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);

View file

@ -165,12 +165,8 @@ int __init amd_numa_init(void)
pr_info("Node %d MemBase %016lx Limit %016lx\n",
nodeid, base, limit);
numa_nodes[nodeid].start = base;
numa_nodes[nodeid].end = limit;
numa_add_memblk(nodeid, base, limit);
prevbase = base;
numa_add_memblk(nodeid, base, limit);
node_set(nodeid, mem_nodes_parsed);
node_set(nodeid, cpu_nodes_parsed);
}

View file

@ -46,8 +46,6 @@ static unsigned long __initdata nodemap_size;
static struct numa_meminfo numa_meminfo __initdata;
struct bootnode numa_nodes[MAX_NUMNODES] __initdata;
/*
* Given a shift value, try to populate memnodemap[]
* Returns :
@ -349,17 +347,17 @@ static int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
* Sanity check to catch more bad NUMA configurations (they are amazingly
* common). Make sure the nodes cover all memory.
*/
static int __init nodes_cover_memory(const struct bootnode *nodes)
static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
{
unsigned long numaram, e820ram;
int i;
numaram = 0;
for_each_node_mask(i, mem_nodes_parsed) {
unsigned long s = nodes[i].start >> PAGE_SHIFT;
unsigned long e = nodes[i].end >> PAGE_SHIFT;
for (i = 0; i < mi->nr_blks; i++) {
unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
numaram += e - s;
numaram -= __absent_pages_in_range(i, s, e);
numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
if ((long)numaram < 0)
numaram = 0;
}
@ -371,14 +369,14 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
(numaram << PAGE_SHIFT) >> 20,
(e820ram << PAGE_SHIFT) >> 20);
return 0;
return false;
}
return 1;
return true;
}
static int __init numa_register_memblks(struct numa_meminfo *mi)
{
int i;
int i, j, nid;
/* Account for nodes with cpus and no memory */
nodes_or(node_possible_map, mem_nodes_parsed, cpu_nodes_parsed);
@ -398,23 +396,34 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
/* for out of order entries */
sort_node_map();
if (!nodes_cover_memory(numa_nodes))
if (!numa_meminfo_cover_memory(mi))
return -EINVAL;
init_memory_mapping_high();
/* Finally register nodes. */
for_each_node_mask(i, node_possible_map)
setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end);
/*
* Try again in case setup_node_bootmem missed one due to missing
* bootmem.
* Finally register nodes. Do it twice in case setup_node_bootmem
* missed one due to missing bootmem.
*/
for_each_node_mask(i, node_possible_map)
if (!node_online(i))
setup_node_bootmem(i, numa_nodes[i].start,
numa_nodes[i].end);
for (i = 0; i < 2; i++) {
for_each_node_mask(nid, node_possible_map) {
u64 start = (u64)max_pfn << PAGE_SHIFT;
u64 end = 0;
if (node_online(nid))
continue;
for (j = 0; j < mi->nr_blks; j++) {
if (nid != mi->blk[j].nid)
continue;
start = min(mi->blk[j].start, start);
end = max(mi->blk[j].end, end);
}
if (start < end)
setup_node_bootmem(nid, start, end);
}
}
return 0;
}
@ -432,33 +441,41 @@ void __init numa_emu_cmdline(char *str)
int __init find_node_by_addr(unsigned long addr)
{
int ret = NUMA_NO_NODE;
const struct numa_meminfo *mi = &numa_meminfo;
int i;
for_each_node_mask(i, mem_nodes_parsed) {
for (i = 0; i < mi->nr_blks; i++) {
/*
* Find the real node that this emulated node appears on. For
* the sake of simplicity, we only use a real node's starting
* address to determine which emulated node it appears on.
*/
if (addr >= numa_nodes[i].start && addr < numa_nodes[i].end) {
ret = i;
break;
}
if (addr >= mi->blk[i].start && addr < mi->blk[i].end)
return mi->blk[i].nid;
}
return ret;
return NUMA_NO_NODE;
}
static int __init setup_physnodes(unsigned long start, unsigned long end)
{
const struct numa_meminfo *mi = &numa_meminfo;
int ret = 0;
int i;
memset(physnodes, 0, sizeof(physnodes));
for_each_node_mask(i, mem_nodes_parsed) {
physnodes[i].start = numa_nodes[i].start;
physnodes[i].end = numa_nodes[i].end;
for (i = 0; i < mi->nr_blks; i++) {
int nid = mi->blk[i].nid;
if (physnodes[nid].start == physnodes[nid].end) {
physnodes[nid].start = mi->blk[i].start;
physnodes[nid].end = mi->blk[i].end;
} else {
physnodes[nid].start = min(physnodes[nid].start,
mi->blk[i].start);
physnodes[nid].end = max(physnodes[nid].end,
mi->blk[i].end);
}
}
/*
@ -809,8 +826,6 @@ static int dummy_numa_init(void)
node_set(0, cpu_nodes_parsed);
node_set(0, mem_nodes_parsed);
numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
numa_nodes[0].start = 0;
numa_nodes[0].end = (u64)max_pfn << PAGE_SHIFT;
return 0;
}
@ -841,7 +856,6 @@ void __init initmem_init(void)
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
memset(&numa_meminfo, 0, sizeof(numa_meminfo));
memset(numa_nodes, 0, sizeof(numa_nodes));
remove_all_active_ranges();
if (numa_init[i]() < 0)

View file

@ -37,13 +37,9 @@ static __init int setup_node(int pxm)
static __init void bad_srat(void)
{
int i;
printk(KERN_ERR "SRAT: SRAT not used.\n");
acpi_numa = -1;
for (i = 0; i < MAX_NUMNODES; i++) {
numa_nodes[i].start = numa_nodes[i].end = 0;
nodes_add[i].start = nodes_add[i].end = 0;
}
memset(nodes_add, 0, sizeof(nodes_add));
}
static __init inline int srat_disabled(void)
@ -210,7 +206,6 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
void __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
{
struct bootnode *nd;
unsigned long start, end;
int node, pxm;
@ -243,18 +238,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
start, end);
if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
nd = &numa_nodes[node];
if (!node_test_and_set(node, mem_nodes_parsed)) {
nd->start = start;
nd->end = end;
} else {
if (start < nd->start)
nd->start = start;
if (nd->end < end)
nd->end = end;
}
} else
if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE))
node_set(node, mem_nodes_parsed);
else
update_nodes_add(node, start, end);
}