remarkable-linux/mm/vmalloc.c
Deepak Saxena fd195c49fb [PATCH] arm: allow for arch-specific IOREMAP_MAX_ORDER
Version 6 of the ARM architecture introduces the concept of 16MB pages
(supersections) and 36-bit (40-bit actually, but nobody uses this) physical
addresses.  36-bit addressed memory and I/O and ARMv6 can only be mapped
using supersections and the requirement on these is that both virtual and
physical addresses be 16MB aligned.  In trying to add support for ioremap()
of 36-bit I/O, we run into the issue that get_vm_area() allows for a
maximum of 512K alignment via the IOREMAP_MAX_ORDER constant.  To work
around this, we can:

- Allocate a larger VM area than needed (size + (1ul << IOREMAP_MAX_ORDER))
  and then align the pointer ourselves, but this ends up with 512K of
  wasted VM per ioremap().

- Provide a new __get_vm_area_aligned() API and make __get_vm_area() sit
  on top of this. I did this and it works but I don't like the idea
  adding another VM API just for this one case.

- My preferred solution which is to allow the architecture to override
  the IOREMAP_MAX_ORDER constant with it's own version.

Signed-off-by: Deepak Saxena <dsaxena@plexity.net>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-05 00:05:46 -07:00

594 lines
13 KiB
C

/*
* linux/mm/vmalloc.c
*
* Copyright (C) 1993 Linus Torvalds
* Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
* SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
* Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
*/
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/vmalloc.h>
#include <asm/uaccess.h>
#include <asm/tlbflush.h>
DEFINE_RWLOCK(vmlist_lock);
struct vm_struct *vmlist;
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
{
pte_t *pte;
pte = pte_offset_kernel(pmd, addr);
do {
pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
WARN_ON(!pte_none(ptent) && !pte_present(ptent));
} while (pte++, addr += PAGE_SIZE, addr != end);
}
static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr,
unsigned long end)
{
pmd_t *pmd;
unsigned long next;
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
if (pmd_none_or_clear_bad(pmd))
continue;
vunmap_pte_range(pmd, addr, next);
} while (pmd++, addr = next, addr != end);
}
static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr,
unsigned long end)
{
pud_t *pud;
unsigned long next;
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud))
continue;
vunmap_pmd_range(pud, addr, next);
} while (pud++, addr = next, addr != end);
}
void unmap_vm_area(struct vm_struct *area)
{
pgd_t *pgd;
unsigned long next;
unsigned long addr = (unsigned long) area->addr;
unsigned long end = addr + area->size;
BUG_ON(addr >= end);
pgd = pgd_offset_k(addr);
flush_cache_vunmap(addr, end);
do {
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
vunmap_pud_range(pgd, addr, next);
} while (pgd++, addr = next, addr != end);
flush_tlb_kernel_range((unsigned long) area->addr, end);
}
static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page ***pages)
{
pte_t *pte;
pte = pte_alloc_kernel(&init_mm, pmd, addr);
if (!pte)
return -ENOMEM;
do {
struct page *page = **pages;
WARN_ON(!pte_none(*pte));
if (!page)
return -ENOMEM;
set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
(*pages)++;
} while (pte++, addr += PAGE_SIZE, addr != end);
return 0;
}
static inline int vmap_pmd_range(pud_t *pud, unsigned long addr,
unsigned long end, pgprot_t prot, struct page ***pages)
{
pmd_t *pmd;
unsigned long next;
pmd = pmd_alloc(&init_mm, pud, addr);
if (!pmd)
return -ENOMEM;
do {
next = pmd_addr_end(addr, end);
if (vmap_pte_range(pmd, addr, next, prot, pages))
return -ENOMEM;
} while (pmd++, addr = next, addr != end);
return 0;
}
static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page ***pages)
{
pud_t *pud;
unsigned long next;
pud = pud_alloc(&init_mm, pgd, addr);
if (!pud)
return -ENOMEM;
do {
next = pud_addr_end(addr, end);
if (vmap_pmd_range(pud, addr, next, prot, pages))
return -ENOMEM;
} while (pud++, addr = next, addr != end);
return 0;
}
int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
{
pgd_t *pgd;
unsigned long next;
unsigned long addr = (unsigned long) area->addr;
unsigned long end = addr + area->size - PAGE_SIZE;
int err;
BUG_ON(addr >= end);
pgd = pgd_offset_k(addr);
spin_lock(&init_mm.page_table_lock);
do {
next = pgd_addr_end(addr, end);
err = vmap_pud_range(pgd, addr, next, prot, pages);
if (err)
break;
} while (pgd++, addr = next, addr != end);
spin_unlock(&init_mm.page_table_lock);
flush_cache_vmap((unsigned long) area->addr, end);
return err;
}
struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
unsigned long start, unsigned long end)
{
struct vm_struct **p, *tmp, *area;
unsigned long align = 1;
unsigned long addr;
if (flags & VM_IOREMAP) {
int bit = fls(size);
if (bit > IOREMAP_MAX_ORDER)
bit = IOREMAP_MAX_ORDER;
else if (bit < PAGE_SHIFT)
bit = PAGE_SHIFT;
align = 1ul << bit;
}
addr = ALIGN(start, align);
size = PAGE_ALIGN(size);
area = kmalloc(sizeof(*area), GFP_KERNEL);
if (unlikely(!area))
return NULL;
if (unlikely(!size)) {
kfree (area);
return NULL;
}
/*
* We always allocate a guard page.
*/
size += PAGE_SIZE;
write_lock(&vmlist_lock);
for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) {
if ((unsigned long)tmp->addr < addr) {
if((unsigned long)tmp->addr + tmp->size >= addr)
addr = ALIGN(tmp->size +
(unsigned long)tmp->addr, align);
continue;
}
if ((size + addr) < addr)
goto out;
if (size + addr <= (unsigned long)tmp->addr)
goto found;
addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align);
if (addr > end - size)
goto out;
}
found:
area->next = *p;
*p = area;
area->flags = flags;
area->addr = (void *)addr;
area->size = size;
area->pages = NULL;
area->nr_pages = 0;
area->phys_addr = 0;
write_unlock(&vmlist_lock);
return area;
out:
write_unlock(&vmlist_lock);
kfree(area);
if (printk_ratelimit())
printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n");
return NULL;
}
/**
* get_vm_area - reserve a contingous kernel virtual area
*
* @size: size of the area
* @flags: %VM_IOREMAP for I/O mappings or VM_ALLOC
*
* Search an area of @size in the kernel virtual mapping area,
* and reserved it for out purposes. Returns the area descriptor
* on success or %NULL on failure.
*/
struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
{
return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END);
}
/* Caller must hold vmlist_lock */
struct vm_struct *__remove_vm_area(void *addr)
{
struct vm_struct **p, *tmp;
for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) {
if (tmp->addr == addr)
goto found;
}
return NULL;
found:
unmap_vm_area(tmp);
*p = tmp->next;
/*
* Remove the guard page.
*/
tmp->size -= PAGE_SIZE;
return tmp;
}
/**
* remove_vm_area - find and remove a contingous kernel virtual area
*
* @addr: base address
*
* Search for the kernel VM area starting at @addr, and remove it.
* This function returns the found VM area, but using it is NOT safe
* on SMP machines, except for its size or flags.
*/
struct vm_struct *remove_vm_area(void *addr)
{
struct vm_struct *v;
write_lock(&vmlist_lock);
v = __remove_vm_area(addr);
write_unlock(&vmlist_lock);
return v;
}
void __vunmap(void *addr, int deallocate_pages)
{
struct vm_struct *area;
if (!addr)
return;
if ((PAGE_SIZE-1) & (unsigned long)addr) {
printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
WARN_ON(1);
return;
}
area = remove_vm_area(addr);
if (unlikely(!area)) {
printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
addr);
WARN_ON(1);
return;
}
if (deallocate_pages) {
int i;
for (i = 0; i < area->nr_pages; i++) {
if (unlikely(!area->pages[i]))
BUG();
__free_page(area->pages[i]);
}
if (area->nr_pages > PAGE_SIZE/sizeof(struct page *))
vfree(area->pages);
else
kfree(area->pages);
}
kfree(area);
return;
}
/**
* vfree - release memory allocated by vmalloc()
*
* @addr: memory base address
*
* Free the virtually contiguous memory area starting at @addr, as
* obtained from vmalloc(), vmalloc_32() or __vmalloc().
*
* May not be called in interrupt context.
*/
void vfree(void *addr)
{
BUG_ON(in_interrupt());
__vunmap(addr, 1);
}
EXPORT_SYMBOL(vfree);
/**
* vunmap - release virtual mapping obtained by vmap()
*
* @addr: memory base address
*
* Free the virtually contiguous memory area starting at @addr,
* which was created from the page array passed to vmap().
*
* May not be called in interrupt context.
*/
void vunmap(void *addr)
{
BUG_ON(in_interrupt());
__vunmap(addr, 0);
}
EXPORT_SYMBOL(vunmap);
/**
* vmap - map an array of pages into virtually contiguous space
*
* @pages: array of page pointers
* @count: number of pages to map
* @flags: vm_area->flags
* @prot: page protection for the mapping
*
* Maps @count pages from @pages into contiguous kernel virtual
* space.
*/
void *vmap(struct page **pages, unsigned int count,
unsigned long flags, pgprot_t prot)
{
struct vm_struct *area;
if (count > num_physpages)
return NULL;
area = get_vm_area((count << PAGE_SHIFT), flags);
if (!area)
return NULL;
if (map_vm_area(area, prot, &pages)) {
vunmap(area->addr);
return NULL;
}
return area->addr;
}
EXPORT_SYMBOL(vmap);
void *__vmalloc_area(struct vm_struct *area, unsigned int __nocast gfp_mask, pgprot_t prot)
{
struct page **pages;
unsigned int nr_pages, array_size, i;
nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
area->nr_pages = nr_pages;
/* Please note that the recursion is strictly bounded. */
if (array_size > PAGE_SIZE)
pages = __vmalloc(array_size, gfp_mask, PAGE_KERNEL);
else
pages = kmalloc(array_size, (gfp_mask & ~__GFP_HIGHMEM));
area->pages = pages;
if (!area->pages) {
remove_vm_area(area->addr);
kfree(area);
return NULL;
}
memset(area->pages, 0, array_size);
for (i = 0; i < area->nr_pages; i++) {
area->pages[i] = alloc_page(gfp_mask);
if (unlikely(!area->pages[i])) {
/* Successfully allocated i pages, free them in __vunmap() */
area->nr_pages = i;
goto fail;
}
}
if (map_vm_area(area, prot, &pages))
goto fail;
return area->addr;
fail:
vfree(area->addr);
return NULL;
}
/**
* __vmalloc - allocate virtually contiguous memory
*
* @size: allocation size
* @gfp_mask: flags for the page level allocator
* @prot: protection mask for the allocated pages
*
* Allocate enough pages to cover @size from the page level
* allocator with @gfp_mask flags. Map them into contiguous
* kernel virtual space, using a pagetable protection of @prot.
*/
void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask, pgprot_t prot)
{
struct vm_struct *area;
size = PAGE_ALIGN(size);
if (!size || (size >> PAGE_SHIFT) > num_physpages)
return NULL;
area = get_vm_area(size, VM_ALLOC);
if (!area)
return NULL;
return __vmalloc_area(area, gfp_mask, prot);
}
EXPORT_SYMBOL(__vmalloc);
/**
* vmalloc - allocate virtually contiguous memory
*
* @size: allocation size
*
* Allocate enough pages to cover @size from the page level
* allocator and map them into contiguous kernel virtual space.
*
* For tight cotrol over page level allocator and protection flags
* use __vmalloc() instead.
*/
void *vmalloc(unsigned long size)
{
return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
}
EXPORT_SYMBOL(vmalloc);
#ifndef PAGE_KERNEL_EXEC
# define PAGE_KERNEL_EXEC PAGE_KERNEL
#endif
/**
* vmalloc_exec - allocate virtually contiguous, executable memory
*
* @size: allocation size
*
* Kernel-internal function to allocate enough pages to cover @size
* the page level allocator and map them into contiguous and
* executable kernel virtual space.
*
* For tight cotrol over page level allocator and protection flags
* use __vmalloc() instead.
*/
void *vmalloc_exec(unsigned long size)
{
return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
}
/**
* vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
*
* @size: allocation size
*
* Allocate enough 32bit PA addressable pages to cover @size from the
* page level allocator and map them into contiguous kernel virtual space.
*/
void *vmalloc_32(unsigned long size)
{
return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
}
EXPORT_SYMBOL(vmalloc_32);
long vread(char *buf, char *addr, unsigned long count)
{
struct vm_struct *tmp;
char *vaddr, *buf_start = buf;
unsigned long n;
/* Don't allow overflow */
if ((unsigned long) addr + count < count)
count = -(unsigned long) addr;
read_lock(&vmlist_lock);
for (tmp = vmlist; tmp; tmp = tmp->next) {
vaddr = (char *) tmp->addr;
if (addr >= vaddr + tmp->size - PAGE_SIZE)
continue;
while (addr < vaddr) {
if (count == 0)
goto finished;
*buf = '\0';
buf++;
addr++;
count--;
}
n = vaddr + tmp->size - PAGE_SIZE - addr;
do {
if (count == 0)
goto finished;
*buf = *addr;
buf++;
addr++;
count--;
} while (--n > 0);
}
finished:
read_unlock(&vmlist_lock);
return buf - buf_start;
}
long vwrite(char *buf, char *addr, unsigned long count)
{
struct vm_struct *tmp;
char *vaddr, *buf_start = buf;
unsigned long n;
/* Don't allow overflow */
if ((unsigned long) addr + count < count)
count = -(unsigned long) addr;
read_lock(&vmlist_lock);
for (tmp = vmlist; tmp; tmp = tmp->next) {
vaddr = (char *) tmp->addr;
if (addr >= vaddr + tmp->size - PAGE_SIZE)
continue;
while (addr < vaddr) {
if (count == 0)
goto finished;
buf++;
addr++;
count--;
}
n = vaddr + tmp->size - PAGE_SIZE - addr;
do {
if (count == 0)
goto finished;
*addr = *buf;
buf++;
addr++;
count--;
} while (--n > 0);
}
finished:
read_unlock(&vmlist_lock);
return buf - buf_start;
}