Merge branch 'x86/bootmem' into x86/mm

Merge reason: the topic is ready - consolidate it into the more generic x86/mm tree
              and prevent conflicts.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar 2011-02-16 09:42:50 +01:00
commit 02ac81a812
9 changed files with 178 additions and 109 deletions

View file

@ -2,6 +2,7 @@
#define _ASM_X86_PAGE_DEFS_H
#include <linux/const.h>
#include <linux/types.h>
/* PAGE_SHIFT determines the page size */
#define PAGE_SHIFT 12
@ -45,9 +46,16 @@ extern int devmem_is_allowed(unsigned long pagenr);
extern unsigned long max_low_pfn_mapped;
extern unsigned long max_pfn_mapped;
static inline phys_addr_t get_max_mapped(void)
{
return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
}
extern unsigned long init_memory_mapping(unsigned long start,
unsigned long end);
void init_memory_mapping_high(void);
extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn,
int acpi, int k8);
extern void free_initmem(void);

View file

@ -13,7 +13,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/memblock.h>
#include <linux/mmzone.h>
#include <linux/pci_ids.h>
#include <linux/pci.h>
@ -57,7 +57,7 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
static u32 __init allocate_aperture(void)
{
u32 aper_size;
void *p;
unsigned long addr;
/* aper_size should <= 1G */
if (fallback_aper_order > 5)
@ -83,27 +83,26 @@ static u32 __init allocate_aperture(void)
* so don't use 512M below as gart iommu, leave the space for kernel
* code for safe
*/
p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20);
if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) {
printk(KERN_ERR
"Cannot allocate aperture memory hole (%lx,%uK)\n",
addr, aper_size>>10);
return 0;
}
memblock_x86_reserve_range(addr, addr + aper_size, "aperture64");
/*
* Kmemleak should not scan this block as it may not be mapped via the
* kernel direct mapping.
*/
kmemleak_ignore(p);
if (!p || __pa(p)+aper_size > 0xffffffff) {
printk(KERN_ERR
"Cannot allocate aperture memory hole (%p,%uK)\n",
p, aper_size>>10);
if (p)
free_bootmem(__pa(p), aper_size);
return 0;
}
kmemleak_ignore(phys_to_virt(addr));
printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
aper_size >> 10, __pa(p));
insert_aperture_resource((u32)__pa(p), aper_size);
register_nosave_region((u32)__pa(p) >> PAGE_SHIFT,
(u32)__pa(p+aper_size) >> PAGE_SHIFT);
aper_size >> 10, addr);
insert_aperture_resource((u32)addr, aper_size);
register_nosave_region(addr >> PAGE_SHIFT,
(addr+aper_size) >> PAGE_SHIFT);
return (u32)__pa(p);
return (u32)addr;
}

View file

@ -293,10 +293,32 @@ static void __init init_gbpages(void)
else
direct_gbpages = 0;
}
static void __init cleanup_highmap_brk_end(void)
{
pud_t *pud;
pmd_t *pmd;
mmu_cr4_features = read_cr4();
/*
* _brk_end cannot change anymore, but it and _end may be
* located on different 2M pages. cleanup_highmap(), however,
* can only consider _end when it runs, so destroy any
* mappings beyond _brk_end here.
*/
pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
pmd = pmd_offset(pud, _brk_end - 1);
while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
pmd_clear(pmd);
}
#else
static inline void init_gbpages(void)
{
}
static inline void cleanup_highmap_brk_end(void)
{
}
#endif
static void __init reserve_brk(void)
@ -307,6 +329,8 @@ static void __init reserve_brk(void)
/* Mark brk area as locked down and no longer taking any
new allocations */
_brk_start = 0;
cleanup_highmap_brk_end();
}
#ifdef CONFIG_BLK_DEV_INITRD
@ -680,15 +704,6 @@ static int __init parse_reservelow(char *p)
early_param("reservelow", parse_reservelow);
static u64 __init get_max_mapped(void)
{
u64 end = max_pfn_mapped;
end <<= PAGE_SHIFT;
return end;
}
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@ -950,14 +965,6 @@ void __init setup_arch(char **cmdline_p)
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
max_pfn_mapped = max_low_pfn_mapped;
#ifdef CONFIG_X86_64
if (max_pfn > max_low_pfn) {
max_pfn_mapped = init_memory_mapping(1UL<<32,
max_pfn<<PAGE_SHIFT);
/* can we preseve max_low_pfn ?*/
max_low_pfn = max_pfn;
}
#endif
memblock.current_limit = get_max_mapped();
/*

View file

@ -278,12 +278,14 @@ int __init amd_scan_nodes(void)
apicid_base = boot_cpu_physical_apicid;
}
for_each_node_mask(i, node_possible_map) {
int j;
for_each_node_mask(i, node_possible_map)
memblock_x86_register_active_regions(i,
nodes[i].start >> PAGE_SHIFT,
nodes[i].end >> PAGE_SHIFT);
init_memory_mapping_high();
for_each_node_mask(i, node_possible_map) {
int j;
for (j = apicid_base; j < cores + apicid_base; j++)
apicid_to_node[(i << bits) + j] = i;
setup_node_bootmem(i, nodes[i].start, nodes[i].end);

View file

@ -33,7 +33,7 @@ int direct_gbpages
static void __init find_early_table_space(unsigned long end, int use_pse,
int use_gbpages)
{
unsigned long puds, pmds, ptes, tables, start;
unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
phys_addr_t base;
puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
@ -65,20 +65,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
#ifdef CONFIG_X86_32
/* for fixmap */
tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
good_end = max_pfn_mapped << PAGE_SHIFT;
#endif
/*
* RED-PEN putting page tables only on node 0 could
* cause a hotspot and fill up ZONE_DMA. The page tables
* need roughly 0.5KB per GB.
*/
#ifdef CONFIG_X86_32
start = 0x7000;
#else
start = 0x8000;
#endif
base = memblock_find_in_range(start, max_pfn_mapped<<PAGE_SHIFT,
tables, PAGE_SIZE);
base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
if (base == MEMBLOCK_ERROR)
panic("Cannot find space for the kernel page tables");
@ -279,25 +270,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
load_cr3(swapper_pg_dir);
#endif
#ifdef CONFIG_X86_64
if (!after_bootmem && !start) {
pud_t *pud;
pmd_t *pmd;
mmu_cr4_features = read_cr4();
/*
* _brk_end cannot change anymore, but it and _end may be
* located on different 2M pages. cleanup_highmap(), however,
* can only consider _end when it runs, so destroy any
* mappings beyond _brk_end here.
*/
pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
pmd = pmd_offset(pud, _brk_end - 1);
while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
pmd_clear(pmd);
}
#endif
__flush_tlb_all();
if (!after_bootmem && e820_table_end > e820_table_start)

View file

@ -333,12 +333,28 @@ static __ref void *alloc_low_page(unsigned long *phys)
return adr;
}
static __ref void *map_low_page(void *virt)
{
void *adr;
unsigned long phys, left;
if (after_bootmem)
return virt;
phys = __pa(virt);
left = phys & (PAGE_SIZE - 1);
adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
adr = (void *)(((unsigned long)adr) | left);
return adr;
}
static __ref void unmap_low_page(void *adr)
{
if (after_bootmem)
return;
early_iounmap(adr, PAGE_SIZE);
early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
}
static unsigned long __meminit
@ -385,15 +401,6 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
return last_map_addr;
}
static unsigned long __meminit
phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
pgprot_t prot)
{
pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
return phys_pte_init(pte, address, end, prot);
}
static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
unsigned long page_size_mask, pgprot_t prot)
@ -420,8 +427,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
if (pmd_val(*pmd)) {
if (!pmd_large(*pmd)) {
spin_lock(&init_mm.page_table_lock);
last_map_addr = phys_pte_update(pmd, address,
pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
last_map_addr = phys_pte_init(pte, address,
end, prot);
unmap_low_page(pte);
spin_unlock(&init_mm.page_table_lock);
continue;
}
@ -467,18 +476,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
return last_map_addr;
}
static unsigned long __meminit
phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
unsigned long page_size_mask, pgprot_t prot)
{
pmd_t *pmd = pmd_offset(pud, 0);
unsigned long last_map_addr;
last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
__flush_tlb_all();
return last_map_addr;
}
static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
unsigned long page_size_mask)
@ -504,8 +501,11 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
if (pud_val(*pud)) {
if (!pud_large(*pud)) {
last_map_addr = phys_pmd_update(pud, addr, end,
pmd = map_low_page(pmd_offset(pud, 0));
last_map_addr = phys_pmd_init(pmd, addr, end,
page_size_mask, prot);
unmap_low_page(pmd);
__flush_tlb_all();
continue;
}
/*
@ -553,17 +553,6 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
return last_map_addr;
}
static unsigned long __meminit
phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
unsigned long page_size_mask)
{
pud_t *pud;
pud = (pud_t *)pgd_page_vaddr(*pgd);
return phys_pud_init(pud, addr, end, page_size_mask);
}
unsigned long __meminit
kernel_physical_mapping_init(unsigned long start,
unsigned long end,
@ -587,8 +576,10 @@ kernel_physical_mapping_init(unsigned long start,
next = end;
if (pgd_val(*pgd)) {
last_map_addr = phys_pud_update(pgd, __pa(start),
pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
last_map_addr = phys_pud_init(pud, __pa(start),
__pa(end), page_size_mask);
unmap_low_page(pud);
continue;
}
@ -616,9 +607,63 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
int acpi, int k8)
{
memblock_x86_register_active_regions(0, start_pfn, end_pfn);
init_memory_mapping_high();
}
#endif
struct mapping_work_data {
unsigned long start;
unsigned long end;
unsigned long pfn_mapped;
};
static int __init_refok
mapping_work_fn(unsigned long start_pfn, unsigned long end_pfn, void *datax)
{
struct mapping_work_data *data = datax;
unsigned long pfn_mapped;
unsigned long final_start, final_end;
final_start = max_t(unsigned long, start_pfn<<PAGE_SHIFT, data->start);
final_end = min_t(unsigned long, end_pfn<<PAGE_SHIFT, data->end);
if (final_end <= final_start)
return 0;
pfn_mapped = init_memory_mapping(final_start, final_end);
if (pfn_mapped > data->pfn_mapped)
data->pfn_mapped = pfn_mapped;
return 0;
}
static unsigned long __init_refok
init_memory_mapping_active_regions(unsigned long start, unsigned long end)
{
struct mapping_work_data data;
data.start = start;
data.end = end;
data.pfn_mapped = 0;
work_with_active_regions(MAX_NUMNODES, mapping_work_fn, &data);
return data.pfn_mapped;
}
void __init_refok init_memory_mapping_high(void)
{
if (max_pfn > max_low_pfn) {
max_pfn_mapped = init_memory_mapping_active_regions(1UL<<32,
max_pfn<<PAGE_SHIFT);
/* can we preserve max_low_pfn ? */
max_low_pfn = max_pfn;
memblock.current_limit = get_max_mapped();
}
}
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];

View file

@ -86,7 +86,7 @@ static int __init allocate_cachealigned_memnodemap(void)
addr = 0x8000;
nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
nodemap_addr = memblock_find_in_range(addr, max_pfn<<PAGE_SHIFT,
nodemap_addr = memblock_find_in_range(addr, get_max_mapped(),
nodemap_size, L1_CACHE_BYTES);
if (nodemap_addr == MEMBLOCK_ERROR) {
printk(KERN_ERR
@ -598,11 +598,12 @@ static int __init numa_emulation(unsigned long start_pfn,
* the e820 memory map.
*/
remove_all_active_ranges();
for_each_node_mask(i, node_possible_map) {
for_each_node_mask(i, node_possible_map)
memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
nodes[i].end >> PAGE_SHIFT);
init_memory_mapping_high();
for_each_node_mask(i, node_possible_map)
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
setup_physnodes(addr, max_addr, acpi, amd);
fake_physnodes(acpi, amd, num_nodes);
numa_init_array();
@ -658,6 +659,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
for (i = 0; i < nr_cpu_ids; i++)
numa_set_node(i, 0);
memblock_x86_register_active_regions(0, start_pfn, last_pfn);
init_memory_mapping_high();
setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
}

View file

@ -444,6 +444,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
return -1;
}
init_memory_mapping_high();
/* Account for nodes with cpus and no memory */
nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);

View file

@ -3616,6 +3616,34 @@ static int __meminit next_active_region_index_in_nid(int index, int nid)
return -1;
}
/*
* Basic iterator support. Return the last range of PFNs for a node
* Note: nid == MAX_NUMNODES returns last region regardless of node
*/
static int __meminit last_active_region_index_in_nid(int nid)
{
int i;
for (i = nr_nodemap_entries - 1; i >= 0; i--)
if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
return i;
return -1;
}
/*
* Basic iterator support. Return the previous active range of PFNs for a node
* Note: nid == MAX_NUMNODES returns next region regardless of node
*/
static int __meminit previous_active_region_index_in_nid(int index, int nid)
{
for (index = index - 1; index >= 0; index--)
if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
return index;
return -1;
}
#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
/*
* Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
@ -3667,6 +3695,10 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
for (i = first_active_region_index_in_nid(nid); i != -1; \
i = next_active_region_index_in_nid(i, nid))
#define for_each_active_range_index_in_nid_reverse(i, nid) \
for (i = last_active_region_index_in_nid(nid); i != -1; \
i = previous_active_region_index_in_nid(i, nid))
/**
* free_bootmem_with_active_regions - Call free_bootmem_node for each active range
* @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
@ -3705,7 +3737,7 @@ u64 __init find_memory_core_early(int nid, u64 size, u64 align,
int i;
/* Need to go over early_node_map to find out good range for node */
for_each_active_range_index_in_nid(i, nid) {
for_each_active_range_index_in_nid_reverse(i, nid) {
u64 addr;
u64 ei_start, ei_last;
u64 final_start, final_end;