diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 51548947ad3b..30bf8043984d 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -484,6 +484,7 @@ out: nodes[i].end >> PAGE_SHIFT); setup_node_bootmem(i, nodes[i].start, nodes[i].end); } + acpi_fake_nodes(nodes, num_nodes); numa_init_array(); return 0; } diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 0e0725db20b7..7ac8ff333e84 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c @@ -350,7 +350,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) /* Sanity check to catch more bad SRATs (they are amazingly common). Make sure the PXMs cover all memory. */ -static int nodes_cover_memory(void) +static int __init nodes_cover_memory(const struct bootnode *nodes) { int i; unsigned long pxmram, e820ram; @@ -406,7 +406,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) } } - if (!nodes_cover_memory()) { + if (!nodes_cover_memory(nodes)) { bad_srat(); return -1; } @@ -440,6 +440,75 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) return 0; } +#ifdef CONFIG_NUMA_EMU +static int __init find_node_by_addr(unsigned long addr) +{ + int ret = NUMA_NO_NODE; + int i; + + for_each_node_mask(i, nodes_parsed) { + /* + * Find the real node that this emulated node appears on. For + * the sake of simplicity, we only use a real node's starting + * address to determine which emulated node it appears on. + */ + if (addr >= nodes[i].start && addr < nodes[i].end) { + ret = i; + break; + } + } + return i; +} + +/* + * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID + * mappings that respect the real ACPI topology but reflect our emulated + * environment. For each emulated node, we find which real node it appears on + * and create PXM to NID mappings for those fake nodes which mirror that + * locality. SLIT will now represent the correct distances between emulated + * nodes as a result of the real topology. + */ +void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) +{ + int i; + int fake_node_to_pxm_map[MAX_NUMNODES] = { + [0 ... MAX_NUMNODES-1] = PXM_INVAL + }; + + printk(KERN_INFO "Faking PXM affinity for fake nodes on real " + "topology.\n"); + for (i = 0; i < num_nodes; i++) { + int nid, pxm; + + nid = find_node_by_addr(fake_nodes[i].start); + if (nid == NUMA_NO_NODE) + continue; + pxm = node_to_pxm(nid); + if (pxm == PXM_INVAL) + continue; + fake_node_to_pxm_map[i] = pxm; + } + for (i = 0; i < num_nodes; i++) + __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); + + nodes_clear(nodes_parsed); + for (i = 0; i < num_nodes; i++) + if (fake_nodes[i].start != fake_nodes[i].end) + node_set(i, nodes_parsed); + WARN_ON(!nodes_cover_memory(fake_nodes)); +} + +static int null_slit_node_compare(int a, int b) +{ + return node_to_pxm(a) == node_to_pxm(b); +} +#else +static int null_slit_node_compare(int a, int b) +{ + return a == b; +} +#endif /* CONFIG_NUMA_EMU */ + void __init srat_reserve_add_area(int nodeid) { if (found_add_area && nodes_add[nodeid].end) { @@ -464,7 +533,8 @@ int __node_distance(int a, int b) int index; if (!acpi_slit) - return a == b ? LOCAL_DISTANCE : REMOTE_DISTANCE; + return null_slit_node_compare(a, b) ? LOCAL_DISTANCE : + REMOTE_DISTANCE; index = acpi_slit->locality_count * node_to_pxm(a); return acpi_slit->entry[index + node_to_pxm(b)]; } diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 6c44b522f4d3..ab04d848b19d 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -36,8 +36,6 @@ ACPI_MODULE_NAME("numa"); static nodemask_t nodes_found_map = NODE_MASK_NONE; -#define PXM_INVAL -1 -#define NID_INVAL -1 /* maps to convert between proximity domain and logical node ID */ static int __cpuinitdata pxm_to_node_map[MAX_PXM_DOMAINS] @@ -59,6 +57,12 @@ int node_to_pxm(int node) return node_to_pxm_map[node]; } +void __acpi_map_pxm_to_node(int pxm, int node) +{ + pxm_to_node_map[pxm] = node; + node_to_pxm_map[node] = pxm; +} + int acpi_map_pxm_to_node(int pxm) { int node = pxm_to_node_map[pxm]; @@ -67,8 +71,7 @@ int acpi_map_pxm_to_node(int pxm) if (nodes_weight(nodes_found_map) >= MAX_NUMNODES) return NID_INVAL; node = first_unset_node(nodes_found_map); - pxm_to_node_map[pxm] = node; - node_to_pxm_map[node] = pxm; + __acpi_map_pxm_to_node(pxm, node); node_set(node, nodes_found_map); } diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h index e2fcee2b340d..62c5ee4311da 100644 --- a/include/acpi/acpi_numa.h +++ b/include/acpi/acpi_numa.h @@ -13,6 +13,7 @@ extern int pxm_to_node(int); extern int node_to_pxm(int); +extern void __acpi_map_pxm_to_node(int, int); extern int acpi_map_pxm_to_node(int); extern void __cpuinit acpi_unmap_pxm_to_node(int); diff --git a/include/asm-x86_64/acpi.h b/include/asm-x86_64/acpi.h index a29f05087a31..1da8f49c0fe2 100644 --- a/include/asm-x86_64/acpi.h +++ b/include/asm-x86_64/acpi.h @@ -29,6 +29,7 @@ #ifdef __KERNEL__ #include +#include #define COMPILER_DEPENDENT_INT64 long long #define COMPILER_DEPENDENT_UINT64 unsigned long long @@ -141,6 +142,16 @@ extern int acpi_pci_disabled; extern int acpi_skip_timer_override; extern int acpi_use_timer_override; +#ifdef CONFIG_ACPI_NUMA +extern void __init acpi_fake_nodes(const struct bootnode *fake_nodes, + int num_nodes); +#else +static inline void acpi_fake_nodes(const struct bootnode *fake_nodes, + int num_nodes) +{ +} +#endif + #endif /*__KERNEL__*/ #endif /*_ASM_ACPI_H*/ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e88b62e6b3aa..d5680cd7746a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -231,6 +231,9 @@ extern int acpi_paddr_to_node(u64 start_addr, u64 size); extern int pnpacpi_disabled; +#define PXM_INVAL (-1) +#define NID_INVAL (-1) + #else /* CONFIG_ACPI */ static inline int acpi_boot_init(void)