Merge branch 'slab/next' into slab/for-linus

This commit is contained in:
Pekka Enberg 2013-05-07 09:19:47 +03:00
commit 69df2ac128
9 changed files with 782 additions and 916 deletions

View file

@ -184,7 +184,7 @@ static int show_stat(struct seq_file *p, void *v)
static int stat_open(struct inode *inode, struct file *file)
{
unsigned size = 1024 + 128 * num_possible_cpus();
size_t size = 1024 + 128 * num_possible_cpus();
char *buf;
struct seq_file *m;
int res;

View file

@ -1,45 +0,0 @@
#if (PAGE_SIZE == 4096)
CACHE(32)
#endif
CACHE(64)
#if L1_CACHE_BYTES < 64
CACHE(96)
#endif
CACHE(128)
#if L1_CACHE_BYTES < 128
CACHE(192)
#endif
CACHE(256)
CACHE(512)
CACHE(1024)
CACHE(2048)
CACHE(4096)
CACHE(8192)
CACHE(16384)
CACHE(32768)
CACHE(65536)
CACHE(131072)
#if KMALLOC_MAX_SIZE >= 262144
CACHE(262144)
#endif
#if KMALLOC_MAX_SIZE >= 524288
CACHE(524288)
#endif
#if KMALLOC_MAX_SIZE >= 1048576
CACHE(1048576)
#endif
#if KMALLOC_MAX_SIZE >= 2097152
CACHE(2097152)
#endif
#if KMALLOC_MAX_SIZE >= 4194304
CACHE(4194304)
#endif
#if KMALLOC_MAX_SIZE >= 8388608
CACHE(8388608)
#endif
#if KMALLOC_MAX_SIZE >= 16777216
CACHE(16777216)
#endif
#if KMALLOC_MAX_SIZE >= 33554432
CACHE(33554432)
#endif

View file

@ -94,29 +94,6 @@
#define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \
(unsigned long)ZERO_SIZE_PTR)
/*
* Common fields provided in kmem_cache by all slab allocators
* This struct is either used directly by the allocator (SLOB)
* or the allocator must include definitions for all fields
* provided in kmem_cache_common in their definition of kmem_cache.
*
* Once we can do anonymous structs (C11 standard) we could put a
* anonymous struct definition in these allocators so that the
* separate allocations in the kmem_cache structure of SLAB and
* SLUB is no longer needed.
*/
#ifdef CONFIG_SLOB
struct kmem_cache {
unsigned int object_size;/* The original size of the object */
unsigned int size; /* The aligned/padded/added on size */
unsigned int align; /* Alignment as calculated */
unsigned long flags; /* Active flags on the slab */
const char *name; /* Slab name for sysfs */
int refcount; /* Use counter */
void (*ctor)(void *); /* Called on object slot creation */
struct list_head list; /* List of all slab caches on the system */
};
#endif
struct mem_cgroup;
/*
@ -148,7 +125,63 @@ void kmem_cache_free(struct kmem_cache *, void *);
(__flags), NULL)
/*
* The largest kmalloc size supported by the slab allocators is
* Common kmalloc functions provided by all allocators
*/
void * __must_check __krealloc(const void *, size_t, gfp_t);
void * __must_check krealloc(const void *, size_t, gfp_t);
void kfree(const void *);
void kzfree(const void *);
size_t ksize(const void *);
/*
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
* Setting ARCH_KMALLOC_MINALIGN in arch headers allows that.
*/
#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8
#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN
#define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN)
#else
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#endif
#ifdef CONFIG_SLOB
/*
* Common fields provided in kmem_cache by all slab allocators
* This struct is either used directly by the allocator (SLOB)
* or the allocator must include definitions for all fields
* provided in kmem_cache_common in their definition of kmem_cache.
*
* Once we can do anonymous structs (C11 standard) we could put a
* anonymous struct definition in these allocators so that the
* separate allocations in the kmem_cache structure of SLAB and
* SLUB is no longer needed.
*/
struct kmem_cache {
unsigned int object_size;/* The original size of the object */
unsigned int size; /* The aligned/padded/added on size */
unsigned int align; /* Alignment as calculated */
unsigned long flags; /* Active flags on the slab */
const char *name; /* Slab name for sysfs */
int refcount; /* Use counter */
void (*ctor)(void *); /* Called on object slot creation */
struct list_head list; /* List of all slab caches on the system */
};
#define KMALLOC_MAX_SIZE (1UL << 30)
#include <linux/slob_def.h>
#else /* CONFIG_SLOB */
/*
* Kmalloc array related definitions
*/
#ifdef CONFIG_SLAB
/*
* The largest kmalloc size supported by the SLAB allocators is
* 32 megabyte (2^25) or the maximum allocatable page order if that is
* less than 32 MB.
*
@ -158,21 +191,119 @@ void kmem_cache_free(struct kmem_cache *, void *);
*/
#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
(MAX_ORDER + PAGE_SHIFT - 1) : 25)
#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 5
#endif
#else
/*
* SLUB allocates up to order 2 pages directly and otherwise
* passes the request to the page allocator.
*/
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 3
#endif
#endif
#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_HIGH)
#define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_HIGH - PAGE_SHIFT)
/* Maximum allocatable size */
#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX)
/* Maximum size for which we actually use a slab cache */
#define KMALLOC_MAX_CACHE_SIZE (1UL << KMALLOC_SHIFT_HIGH)
/* Maximum order allocatable via the slab allocagtor */
#define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_MAX - PAGE_SHIFT)
/*
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
* Setting ARCH_KMALLOC_MINALIGN in arch headers allows that.
* Kmalloc subsystem.
*/
#ifdef ARCH_DMA_MINALIGN
#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
#else
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#ifndef KMALLOC_MIN_SIZE
#define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW)
#endif
extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
#ifdef CONFIG_ZONE_DMA
extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
#endif
/*
* Figure out which kmalloc slab an allocation of a certain size
* belongs to.
* 0 = zero alloc
* 1 = 65 .. 96 bytes
* 2 = 120 .. 192 bytes
* n = 2^(n-1) .. 2^n -1
*/
static __always_inline int kmalloc_index(size_t size)
{
if (!size)
return 0;
if (size <= KMALLOC_MIN_SIZE)
return KMALLOC_SHIFT_LOW;
if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96)
return 1;
if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192)
return 2;
if (size <= 8) return 3;
if (size <= 16) return 4;
if (size <= 32) return 5;
if (size <= 64) return 6;
if (size <= 128) return 7;
if (size <= 256) return 8;
if (size <= 512) return 9;
if (size <= 1024) return 10;
if (size <= 2 * 1024) return 11;
if (size <= 4 * 1024) return 12;
if (size <= 8 * 1024) return 13;
if (size <= 16 * 1024) return 14;
if (size <= 32 * 1024) return 15;
if (size <= 64 * 1024) return 16;
if (size <= 128 * 1024) return 17;
if (size <= 256 * 1024) return 18;
if (size <= 512 * 1024) return 19;
if (size <= 1024 * 1024) return 20;
if (size <= 2 * 1024 * 1024) return 21;
if (size <= 4 * 1024 * 1024) return 22;
if (size <= 8 * 1024 * 1024) return 23;
if (size <= 16 * 1024 * 1024) return 24;
if (size <= 32 * 1024 * 1024) return 25;
if (size <= 64 * 1024 * 1024) return 26;
BUG();
/* Will never be reached. Needed because the compiler may complain */
return -1;
}
#ifdef CONFIG_SLAB
#include <linux/slab_def.h>
#elif defined(CONFIG_SLUB)
#include <linux/slub_def.h>
#else
#error "Unknown slab allocator"
#endif
/*
* Determine size used for the nth kmalloc cache.
* return size or 0 if a kmalloc cache for that
* size does not exist
*/
static __always_inline int kmalloc_size(int n)
{
if (n > 2)
return 1 << n;
if (n == 1 && KMALLOC_MIN_SIZE <= 32)
return 96;
if (n == 2 && KMALLOC_MIN_SIZE <= 64)
return 192;
return 0;
}
#endif /* !CONFIG_SLOB */
/*
* Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
* Intended for arches that get misalignment faults even for 64 bit integer
@ -224,42 +355,6 @@ struct seq_file;
int cache_show(struct kmem_cache *s, struct seq_file *m);
void print_slabinfo_header(struct seq_file *m);
/*
* Common kmalloc functions provided by all allocators
*/
void * __must_check __krealloc(const void *, size_t, gfp_t);
void * __must_check krealloc(const void *, size_t, gfp_t);
void kfree(const void *);
void kzfree(const void *);
size_t ksize(const void *);
/*
* Allocator specific definitions. These are mainly used to establish optimized
* ways to convert kmalloc() calls to kmem_cache_alloc() invocations by
* selecting the appropriate general cache at compile time.
*
* Allocators must define at least:
*
* kmem_cache_alloc()
* __kmalloc()
* kmalloc()
*
* Those wishing to support NUMA must also define:
*
* kmem_cache_alloc_node()
* kmalloc_node()
*
* See each allocator definition file for additional comments and
* implementation notes.
*/
#ifdef CONFIG_SLUB
#include <linux/slub_def.h>
#elif defined(CONFIG_SLOB)
#include <linux/slob_def.h>
#else
#include <linux/slab_def.h>
#endif
/**
* kmalloc_array - allocate memory for an array.
* @n: number of elements.

View file

@ -11,8 +11,6 @@
*/
#include <linux/init.h>
#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
#include <linux/compiler.h>
/*
@ -97,23 +95,13 @@ struct kmem_cache {
* pointer for each node since "nodelists" uses the remainder of
* available pointers.
*/
struct kmem_list3 **nodelists;
struct kmem_cache_node **node;
struct array_cache *array[NR_CPUS + MAX_NUMNODES];
/*
* Do not add fields after array[]
*/
};
/* Size description struct for general caches. */
struct cache_sizes {
size_t cs_size;
struct kmem_cache *cs_cachep;
#ifdef CONFIG_ZONE_DMA
struct kmem_cache *cs_dmacachep;
#endif
};
extern struct cache_sizes malloc_sizes[];
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
void *__kmalloc(size_t size, gfp_t flags);
@ -133,26 +121,22 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
void *ret;
if (__builtin_constant_p(size)) {
int i = 0;
int i;
if (!size)
return ZERO_SIZE_PTR;
#define CACHE(x) \
if (size <= x) \
goto found; \
else \
i++;
#include <linux/kmalloc_sizes.h>
#undef CACHE
return NULL;
found:
if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE))
return NULL;
i = kmalloc_index(size);
#ifdef CONFIG_ZONE_DMA
if (flags & GFP_DMA)
cachep = malloc_sizes[i].cs_dmacachep;
cachep = kmalloc_dma_caches[i];
else
#endif
cachep = malloc_sizes[i].cs_cachep;
cachep = kmalloc_caches[i];
ret = kmem_cache_alloc_trace(cachep, flags, size);
@ -186,26 +170,22 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
struct kmem_cache *cachep;
if (__builtin_constant_p(size)) {
int i = 0;
int i;
if (!size)
return ZERO_SIZE_PTR;
#define CACHE(x) \
if (size <= x) \
goto found; \
else \
i++;
#include <linux/kmalloc_sizes.h>
#undef CACHE
return NULL;
found:
if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE))
return NULL;
i = kmalloc_index(size);
#ifdef CONFIG_ZONE_DMA
if (flags & GFP_DMA)
cachep = malloc_sizes[i].cs_dmacachep;
cachep = kmalloc_dma_caches[i];
else
#endif
cachep = malloc_sizes[i].cs_cachep;
cachep = kmalloc_caches[i];
return kmem_cache_alloc_node_trace(cachep, flags, node, size);
}

View file

@ -53,17 +53,6 @@ struct kmem_cache_cpu {
#endif
};
struct kmem_cache_node {
spinlock_t list_lock; /* Protect partial list and nr_partial */
unsigned long nr_partial;
struct list_head partial;
#ifdef CONFIG_SLUB_DEBUG
atomic_long_t nr_slabs;
atomic_long_t total_objects;
struct list_head full;
#endif
};
/*
* Word size structure that can be atomically updated or read and that
* contains both the order and the number of objects that a slab of the
@ -115,111 +104,6 @@ struct kmem_cache {
struct kmem_cache_node *node[MAX_NUMNODES];
};
/*
* Kmalloc subsystem.
*/
#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8
#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN
#else
#define KMALLOC_MIN_SIZE 8
#endif
#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
/*
* Maximum kmalloc object size handled by SLUB. Larger object allocations
* are passed through to the page allocator. The page allocator "fastpath"
* is relatively slow so we need this value sufficiently high so that
* performance critical objects are allocated through the SLUB fastpath.
*
* This should be dropped to PAGE_SIZE / 2 once the page allocator
* "fastpath" becomes competitive with the slab allocator fastpaths.
*/
#define SLUB_MAX_SIZE (2 * PAGE_SIZE)
#define SLUB_PAGE_SHIFT (PAGE_SHIFT + 2)
#ifdef CONFIG_ZONE_DMA
#define SLUB_DMA __GFP_DMA
#else
/* Disable DMA functionality */
#define SLUB_DMA (__force gfp_t)0
#endif
/*
* We keep the general caches in an array of slab caches that are used for
* 2^x bytes of allocations.
*/
extern struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
/*
* Sorry that the following has to be that ugly but some versions of GCC
* have trouble with constant propagation and loops.
*/
static __always_inline int kmalloc_index(size_t size)
{
if (!size)
return 0;
if (size <= KMALLOC_MIN_SIZE)
return KMALLOC_SHIFT_LOW;
if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96)
return 1;
if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192)
return 2;
if (size <= 8) return 3;
if (size <= 16) return 4;
if (size <= 32) return 5;
if (size <= 64) return 6;
if (size <= 128) return 7;
if (size <= 256) return 8;
if (size <= 512) return 9;
if (size <= 1024) return 10;
if (size <= 2 * 1024) return 11;
if (size <= 4 * 1024) return 12;
/*
* The following is only needed to support architectures with a larger page
* size than 4k. We need to support 2 * PAGE_SIZE here. So for a 64k page
* size we would have to go up to 128k.
*/
if (size <= 8 * 1024) return 13;
if (size <= 16 * 1024) return 14;
if (size <= 32 * 1024) return 15;
if (size <= 64 * 1024) return 16;
if (size <= 128 * 1024) return 17;
if (size <= 256 * 1024) return 18;
if (size <= 512 * 1024) return 19;
if (size <= 1024 * 1024) return 20;
if (size <= 2 * 1024 * 1024) return 21;
BUG();
return -1; /* Will never be reached */
/*
* What we really wanted to do and cannot do because of compiler issues is:
* int i;
* for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++)
* if (size <= (1 << i))
* return i;
*/
}
/*
* Find the slab cache for a given combination of allocation flags and size.
*
* This ought to end up with a global pointer to the right cache
* in kmalloc_caches.
*/
static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
{
int index = kmalloc_index(size);
if (index == 0)
return NULL;
return kmalloc_caches[index];
}
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
void *__kmalloc(size_t size, gfp_t flags);
@ -274,16 +158,17 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size)) {
if (size > SLUB_MAX_SIZE)
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large(size, flags);
if (!(flags & SLUB_DMA)) {
struct kmem_cache *s = kmalloc_slab(size);
if (!(flags & GFP_DMA)) {
int index = kmalloc_index(size);
if (!s)
if (!index)
return ZERO_SIZE_PTR;
return kmem_cache_alloc_trace(s, flags, size);
return kmem_cache_alloc_trace(kmalloc_caches[index],
flags, size);
}
}
return __kmalloc(size, flags);
@ -310,13 +195,14 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
{
if (__builtin_constant_p(size) &&
size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) {
struct kmem_cache *s = kmalloc_slab(size);
size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) {
int index = kmalloc_index(size);
if (!s)
if (!index)
return ZERO_SIZE_PTR;
return kmem_cache_alloc_node_trace(s, flags, node, size);
return kmem_cache_alloc_node_trace(kmalloc_caches[index],
flags, node, size);
}
return __kmalloc_node(size, flags, node);
}

790
mm/slab.c

File diff suppressed because it is too large Load diff

View file

@ -16,7 +16,7 @@ enum slab_state {
DOWN, /* No slab functionality yet */
PARTIAL, /* SLUB: kmem_cache_node available */
PARTIAL_ARRAYCACHE, /* SLAB: kmalloc size for arraycache available */
PARTIAL_L3, /* SLAB: kmalloc size for l3 struct available */
PARTIAL_NODE, /* SLAB: kmalloc size for node struct available */
UP, /* Slab caches usable but not all extras yet */
FULL /* Everything is working */
};
@ -35,6 +35,15 @@ extern struct kmem_cache *kmem_cache;
unsigned long calculate_alignment(unsigned long flags,
unsigned long align, unsigned long size);
#ifndef CONFIG_SLOB
/* Kmalloc array related functions */
void create_kmalloc_caches(unsigned long);
/* Find the kmalloc slab corresponding for a certain size */
struct kmem_cache *kmalloc_slab(size_t, gfp_t);
#endif
/* Functions provided by the slab allocators */
extern int __kmem_cache_create(struct kmem_cache *, unsigned long flags);
@ -230,3 +239,35 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
return s;
}
#endif
/*
* The slab lists for all objects.
*/
struct kmem_cache_node {
spinlock_t list_lock;
#ifdef CONFIG_SLAB
struct list_head slabs_partial; /* partial list first, better asm code */
struct list_head slabs_full;
struct list_head slabs_free;
unsigned long free_objects;
unsigned int free_limit;
unsigned int colour_next; /* Per-node cache coloring */
struct array_cache *shared; /* shared per node */
struct array_cache **alien; /* on other nodes */
unsigned long next_reap; /* updated without locking */
int free_touched; /* updated without locking */
#endif
#ifdef CONFIG_SLUB
unsigned long nr_partial;
struct list_head partial;
#ifdef CONFIG_SLUB_DEBUG
atomic_long_t nr_slabs;
atomic_long_t total_objects;
struct list_head full;
#endif
#endif
};

View file

@ -299,7 +299,7 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t siz
err = __kmem_cache_create(s, flags);
if (err)
panic("Creation of kmalloc slab %s size=%zd failed. Reason %d\n",
panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n",
name, size, err);
s->refcount = -1; /* Exempt from merging for now */
@ -319,6 +319,178 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
return s;
}
struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
EXPORT_SYMBOL(kmalloc_caches);
#ifdef CONFIG_ZONE_DMA
struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
EXPORT_SYMBOL(kmalloc_dma_caches);
#endif
/*
* Conversion table for small slabs sizes / 8 to the index in the
* kmalloc array. This is necessary for slabs < 192 since we have non power
* of two cache sizes there. The size of larger slabs can be determined using
* fls.
*/
static s8 size_index[24] = {
3, /* 8 */
4, /* 16 */
5, /* 24 */
5, /* 32 */
6, /* 40 */
6, /* 48 */
6, /* 56 */
6, /* 64 */
1, /* 72 */
1, /* 80 */
1, /* 88 */
1, /* 96 */
7, /* 104 */
7, /* 112 */
7, /* 120 */
7, /* 128 */
2, /* 136 */
2, /* 144 */
2, /* 152 */
2, /* 160 */
2, /* 168 */
2, /* 176 */
2, /* 184 */
2 /* 192 */
};
static inline int size_index_elem(size_t bytes)
{
return (bytes - 1) / 8;
}
/*
* Find the kmem_cache structure that serves a given size of
* allocation
*/
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
{
int index;
if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE))
return NULL;
if (size <= 192) {
if (!size)
return ZERO_SIZE_PTR;
index = size_index[size_index_elem(size)];
} else
index = fls(size - 1);
#ifdef CONFIG_ZONE_DMA
if (unlikely((flags & GFP_DMA)))
return kmalloc_dma_caches[index];
#endif
return kmalloc_caches[index];
}
/*
* Create the kmalloc array. Some of the regular kmalloc arrays
* may already have been created because they were needed to
* enable allocations for slab creation.
*/
void __init create_kmalloc_caches(unsigned long flags)
{
int i;
/*
* Patch up the size_index table if we have strange large alignment
* requirements for the kmalloc array. This is only the case for
* MIPS it seems. The standard arches will not generate any code here.
*
* Largest permitted alignment is 256 bytes due to the way we
* handle the index determination for the smaller caches.
*
* Make sure that nothing crazy happens if someone starts tinkering
* around with ARCH_KMALLOC_MINALIGN
*/
BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
int elem = size_index_elem(i);
if (elem >= ARRAY_SIZE(size_index))
break;
size_index[elem] = KMALLOC_SHIFT_LOW;
}
if (KMALLOC_MIN_SIZE >= 64) {
/*
* The 96 byte size cache is not used if the alignment
* is 64 byte.
*/
for (i = 64 + 8; i <= 96; i += 8)
size_index[size_index_elem(i)] = 7;
}
if (KMALLOC_MIN_SIZE >= 128) {
/*
* The 192 byte sized cache is not used if the alignment
* is 128 byte. Redirect kmalloc to use the 256 byte cache
* instead.
*/
for (i = 128 + 8; i <= 192; i += 8)
size_index[size_index_elem(i)] = 8;
}
for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
if (!kmalloc_caches[i]) {
kmalloc_caches[i] = create_kmalloc_cache(NULL,
1 << i, flags);
/*
* Caches that are not of the two-to-the-power-of size.
* These have to be created immediately after the
* earlier power of two caches
*/
if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags);
if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
kmalloc_caches[2] = create_kmalloc_cache(NULL, 192, flags);
}
}
/* Kmalloc array is now usable */
slab_state = UP;
for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
struct kmem_cache *s = kmalloc_caches[i];
char *n;
if (s) {
n = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i));
BUG_ON(!n);
s->name = n;
}
}
#ifdef CONFIG_ZONE_DMA
for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
struct kmem_cache *s = kmalloc_caches[i];
if (s) {
int size = kmalloc_size(i);
char *n = kasprintf(GFP_NOWAIT,
"dma-kmalloc-%d", size);
BUG_ON(!n);
kmalloc_dma_caches[i] = create_kmalloc_cache(n,
size, SLAB_CACHE_DMA | flags);
}
}
#endif
}
#endif /* !CONFIG_SLOB */

221
mm/slub.c
View file

@ -1005,7 +1005,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
* dilemma by deferring the increment of the count during
* bootstrap (see early_kmem_cache_node_alloc).
*/
if (n) {
if (likely(n)) {
atomic_long_inc(&n->nr_slabs);
atomic_long_add(objects, &n->total_objects);
}
@ -1493,7 +1493,7 @@ static inline void remove_partial(struct kmem_cache_node *n,
*/
static inline void *acquire_slab(struct kmem_cache *s,
struct kmem_cache_node *n, struct page *page,
int mode)
int mode, int *objects)
{
void *freelist;
unsigned long counters;
@ -1507,6 +1507,7 @@ static inline void *acquire_slab(struct kmem_cache *s,
freelist = page->freelist;
counters = page->counters;
new.counters = counters;
*objects = new.objects - new.inuse;
if (mode) {
new.inuse = page->objects;
new.freelist = NULL;
@ -1528,7 +1529,7 @@ static inline void *acquire_slab(struct kmem_cache *s,
return freelist;
}
static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
/*
@ -1539,6 +1540,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
{
struct page *page, *page2;
void *object = NULL;
int available = 0;
int objects;
/*
* Racy check. If we mistakenly see no partial slabs then we
@ -1552,22 +1555,21 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
spin_lock(&n->list_lock);
list_for_each_entry_safe(page, page2, &n->partial, lru) {
void *t;
int available;
if (!pfmemalloc_match(page, flags))
continue;
t = acquire_slab(s, n, page, object == NULL);
t = acquire_slab(s, n, page, object == NULL, &objects);
if (!t)
break;
available += objects;
if (!object) {
c->page = page;
stat(s, ALLOC_FROM_PARTIAL);
object = t;
available = page->objects - page->inuse;
} else {
available = put_cpu_partial(s, page, 0);
put_cpu_partial(s, page, 0);
stat(s, CPU_PARTIAL_NODE);
}
if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
@ -1946,7 +1948,7 @@ static void unfreeze_partials(struct kmem_cache *s,
* If we did not find a slot then simply move all the partials to the
* per node partial list.
*/
static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
{
struct page *oldpage;
int pages;
@ -1984,7 +1986,6 @@ static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
page->next = oldpage;
} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
return pobjects;
}
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
@ -2041,7 +2042,7 @@ static void flush_all(struct kmem_cache *s)
static inline int node_match(struct page *page, int node)
{
#ifdef CONFIG_NUMA
if (node != NUMA_NO_NODE && page_to_nid(page) != node)
if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
return 0;
#endif
return 1;
@ -2331,13 +2332,18 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
s = memcg_kmem_get_cache(s, gfpflags);
redo:
/*
* Must read kmem_cache cpu data via this cpu ptr. Preemption is
* enabled. We may switch back and forth between cpus while
* reading from one cpu area. That does not matter as long
* as we end up on the original cpu again when doing the cmpxchg.
*
* Preemption is disabled for the retrieval of the tid because that
* must occur from the current processor. We cannot allow rescheduling
* on a different processor between the determination of the pointer
* and the retrieval of the tid.
*/
preempt_disable();
c = __this_cpu_ptr(s->cpu_slab);
/*
@ -2347,7 +2353,7 @@ redo:
* linked list in between.
*/
tid = c->tid;
barrier();
preempt_enable();
object = c->freelist;
page = c->page;
@ -2594,10 +2600,11 @@ redo:
* data is retrieved via this pointer. If we are on the same cpu
* during the cmpxchg then the free will succedd.
*/
preempt_disable();
c = __this_cpu_ptr(s->cpu_slab);
tid = c->tid;
barrier();
preempt_enable();
if (likely(page == c->page)) {
set_freepointer(s, object, c->freelist);
@ -2775,7 +2782,7 @@ init_kmem_cache_node(struct kmem_cache_node *n)
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
{
BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
/*
* Must align to double word boundary for the double cmpxchg
@ -2982,7 +2989,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
s->allocflags |= __GFP_COMP;
if (s->flags & SLAB_CACHE_DMA)
s->allocflags |= SLUB_DMA;
s->allocflags |= GFP_DMA;
if (s->flags & SLAB_RECLAIM_ACCOUNT)
s->allocflags |= __GFP_RECLAIMABLE;
@ -3174,13 +3181,6 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
* Kmalloc subsystem
*******************************************************************/
struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
EXPORT_SYMBOL(kmalloc_caches);
#ifdef CONFIG_ZONE_DMA
static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
#endif
static int __init setup_slub_min_order(char *str)
{
get_option(&str, &slub_min_order);
@ -3217,73 +3217,15 @@ static int __init setup_slub_nomerge(char *str)
__setup("slub_nomerge", setup_slub_nomerge);
/*
* Conversion table for small slabs sizes / 8 to the index in the
* kmalloc array. This is necessary for slabs < 192 since we have non power
* of two cache sizes there. The size of larger slabs can be determined using
* fls.
*/
static s8 size_index[24] = {
3, /* 8 */
4, /* 16 */
5, /* 24 */
5, /* 32 */
6, /* 40 */
6, /* 48 */
6, /* 56 */
6, /* 64 */
1, /* 72 */
1, /* 80 */
1, /* 88 */
1, /* 96 */
7, /* 104 */
7, /* 112 */
7, /* 120 */
7, /* 128 */
2, /* 136 */
2, /* 144 */
2, /* 152 */
2, /* 160 */
2, /* 168 */
2, /* 176 */
2, /* 184 */
2 /* 192 */
};
static inline int size_index_elem(size_t bytes)
{
return (bytes - 1) / 8;
}
static struct kmem_cache *get_slab(size_t size, gfp_t flags)
{
int index;
if (size <= 192) {
if (!size)
return ZERO_SIZE_PTR;
index = size_index[size_index_elem(size)];
} else
index = fls(size - 1);
#ifdef CONFIG_ZONE_DMA
if (unlikely((flags & SLUB_DMA)))
return kmalloc_dma_caches[index];
#endif
return kmalloc_caches[index];
}
void *__kmalloc(size_t size, gfp_t flags)
{
struct kmem_cache *s;
void *ret;
if (unlikely(size > SLUB_MAX_SIZE))
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
return kmalloc_large(size, flags);
s = get_slab(size, flags);
s = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
@ -3316,7 +3258,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
struct kmem_cache *s;
void *ret;
if (unlikely(size > SLUB_MAX_SIZE)) {
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, flags, node);
trace_kmalloc_node(_RET_IP_, ret,
@ -3326,7 +3268,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
return ret;
}
s = get_slab(size, flags);
s = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
@ -3617,6 +3559,12 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
memcpy(s, static_cache, kmem_cache->object_size);
/*
* This runs very early, and only the boot processor is supposed to be
* up. Even if it weren't true, IRQs are not up so we couldn't fire
* IPIs around.
*/
__flush_cpu_slab(s, smp_processor_id());
for_each_node_state(node, N_NORMAL_MEMORY) {
struct kmem_cache_node *n = get_node(s, node);
struct page *p;
@ -3639,8 +3587,6 @@ void __init kmem_cache_init(void)
{
static __initdata struct kmem_cache boot_kmem_cache,
boot_kmem_cache_node;
int i;
int caches = 2;
if (debug_guardpage_minorder())
slub_max_order = 0;
@ -3671,103 +3617,16 @@ void __init kmem_cache_init(void)
kmem_cache_node = bootstrap(&boot_kmem_cache_node);
/* Now we can use the kmem_cache to allocate kmalloc slabs */
/*
* Patch up the size_index table if we have strange large alignment
* requirements for the kmalloc array. This is only the case for
* MIPS it seems. The standard arches will not generate any code here.
*
* Largest permitted alignment is 256 bytes due to the way we
* handle the index determination for the smaller caches.
*
* Make sure that nothing crazy happens if someone starts tinkering
* around with ARCH_KMALLOC_MINALIGN
*/
BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
int elem = size_index_elem(i);
if (elem >= ARRAY_SIZE(size_index))
break;
size_index[elem] = KMALLOC_SHIFT_LOW;
}
if (KMALLOC_MIN_SIZE == 64) {
/*
* The 96 byte size cache is not used if the alignment
* is 64 byte.
*/
for (i = 64 + 8; i <= 96; i += 8)
size_index[size_index_elem(i)] = 7;
} else if (KMALLOC_MIN_SIZE == 128) {
/*
* The 192 byte sized cache is not used if the alignment
* is 128 byte. Redirect kmalloc to use the 256 byte cache
* instead.
*/
for (i = 128 + 8; i <= 192; i += 8)
size_index[size_index_elem(i)] = 8;
}
/* Caches that are not of the two-to-the-power-of size */
if (KMALLOC_MIN_SIZE <= 32) {
kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
caches++;
}
if (KMALLOC_MIN_SIZE <= 64) {
kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
caches++;
}
for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
caches++;
}
slab_state = UP;
/* Provide the correct kmalloc names now that the caches are up */
if (KMALLOC_MIN_SIZE <= 32) {
kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
BUG_ON(!kmalloc_caches[1]->name);
}
if (KMALLOC_MIN_SIZE <= 64) {
kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
BUG_ON(!kmalloc_caches[2]->name);
}
for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
BUG_ON(!s);
kmalloc_caches[i]->name = s;
}
create_kmalloc_caches(0);
#ifdef CONFIG_SMP
register_cpu_notifier(&slab_notifier);
#endif
#ifdef CONFIG_ZONE_DMA
for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
struct kmem_cache *s = kmalloc_caches[i];
if (s && s->size) {
char *name = kasprintf(GFP_NOWAIT,
"dma-kmalloc-%d", s->object_size);
BUG_ON(!name);
kmalloc_dma_caches[i] = create_kmalloc_cache(name,
s->object_size, SLAB_CACHE_DMA);
}
}
#endif
printk(KERN_INFO
"SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
"SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d,"
" CPUs=%d, Nodes=%d\n",
caches, cache_line_size(),
cache_line_size(),
slub_min_order, slub_max_order, slub_min_objects,
nr_cpu_ids, nr_node_ids);
}
@ -3930,10 +3789,10 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
struct kmem_cache *s;
void *ret;
if (unlikely(size > SLUB_MAX_SIZE))
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
return kmalloc_large(size, gfpflags);
s = get_slab(size, gfpflags);
s = kmalloc_slab(size, gfpflags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
@ -3953,7 +3812,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
struct kmem_cache *s;
void *ret;
if (unlikely(size > SLUB_MAX_SIZE)) {
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, gfpflags, node);
trace_kmalloc_node(caller, ret,
@ -3963,7 +3822,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
return ret;
}
s = get_slab(size, gfpflags);
s = kmalloc_slab(size, gfpflags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
@ -4312,7 +4171,7 @@ static void resiliency_test(void)
{
u8 *p;
BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10);
BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
printk(KERN_ERR "SLUB resiliency testing\n");
printk(KERN_ERR "-----------------------\n");