From fa5ec8a1f66f3c2a3af723abcf8085509c9ee682 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 7 Jul 2009 00:14:14 -0700 Subject: [PATCH 1/9] slub: add option to disable higher order debugging slabs When debugging is enabled, slub requires that additional metadata be stored in slabs for certain options: SLAB_RED_ZONE, SLAB_POISON, and SLAB_STORE_USER. Consequently, it may require that the minimum possible slab order needed to allocate a single object be greater when using these options. The most notable example is for objects that are PAGE_SIZE bytes in size. Higher minimum slab orders may cause page allocation failures when oom or under heavy fragmentation. This patch adds a new slub_debug option, which disables debugging by default for caches that would have resulted in higher minimum orders: slub_debug=O When this option is used on systems with 4K pages, kmalloc-4096, for example, will not have debugging enabled by default even if CONFIG_SLUB_DEBUG_ON is defined because it would have resulted in a order-1 minimum slab order. Reported-by: Larry Finger Tested-by: Larry Finger Cc: Christoph Lameter Signed-off-by: David Rientjes Signed-off-by: Pekka Enberg --- Documentation/vm/slub.txt | 10 ++++++++++ mm/slub.c | 41 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt index bb1f5c6e28b3..510917ff59ed 100644 --- a/Documentation/vm/slub.txt +++ b/Documentation/vm/slub.txt @@ -41,6 +41,8 @@ Possible debug options are P Poisoning (object and padding) U User tracking (free and alloc) T Trace (please only use on single slabs) + O Switch debugging off for caches that would have + caused higher minimum slab orders - Switch all debugging off (useful if the kernel is configured with CONFIG_SLUB_DEBUG_ON) @@ -59,6 +61,14 @@ to the dentry cache with slub_debug=F,dentry +Debugging options may require the minimum possible slab order to increase as +a result of storing the metadata (for example, caches with PAGE_SIZE object +sizes). This has a higher liklihood of resulting in slab allocation errors +in low memory situations or if there's high fragmentation of memory. To +switch off debugging for such caches by default, use + + slub_debug=O + In case you forgot to enable debugging on the kernel command line: It is possible to enable debugging manually when the kernel is up. Look at the contents of: diff --git a/mm/slub.c b/mm/slub.c index a9201d83178b..466089cd5deb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -141,6 +141,13 @@ #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ SLAB_POISON | SLAB_STORE_USER) +/* + * Debugging flags that require metadata to be stored in the slab, up to + * DEBUG_SIZE in size. + */ +#define DEBUG_SIZE_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) +#define DEBUG_SIZE (3 * sizeof(void *) + 2 * sizeof(struct track)) + /* * Set of flags that will prevent slab merging */ @@ -326,6 +333,7 @@ static int slub_debug; #endif static char *slub_debug_slabs; +static int disable_higher_order_debug; /* * Object debugging @@ -977,6 +985,15 @@ static int __init setup_slub_debug(char *str) */ goto check_slabs; + if (tolower(*str) == 'o') { + /* + * Avoid enabling debugging on caches if its minimum order + * would increase as a result. + */ + disable_higher_order_debug = 1; + goto out; + } + slub_debug = 0; if (*str == '-') /* @@ -1023,13 +1040,27 @@ static unsigned long kmem_cache_flags(unsigned long objsize, unsigned long flags, const char *name, void (*ctor)(void *)) { + int debug_flags = slub_debug; + /* * Enable debugging if selected on the kernel commandline. */ - if (slub_debug && (!slub_debug_slabs || - strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0)) - flags |= slub_debug; + if (debug_flags) { + if (slub_debug_slabs && + strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))) + goto out; + /* + * Disable debugging that increases slab size if the minimum + * slab order would have increased as a result. + */ + if (disable_higher_order_debug && + get_order(objsize + DEBUG_SIZE) > get_order(objsize)) + debug_flags &= ~DEBUG_SIZE_FLAGS; + + flags |= debug_flags; + } +out: return flags; } #else @@ -1561,6 +1592,10 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) "default order: %d, min order: %d\n", s->name, s->objsize, s->size, oo_order(s->oo), oo_order(s->min)); + if (oo_order(s->min) > get_order(s->objsize)) + printk(KERN_WARNING " %s debugging increased min order, use " + "slub_debug=O to disable.\n", s->name); + for_each_online_node(node) { struct kmem_cache_node *n = get_node(s, node); unsigned long nr_slabs; From 3de472138a138008b534d9587593ba83390e330a Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 27 Jul 2009 18:30:35 -0700 Subject: [PATCH 2/9] slub: use size and objsize orders to disable debug flags This patch moves the masking of debugging flags which increase a cache's min order due to metadata when `slub_debug=O' is used from kmem_cache_flags() to kmem_cache_open(). Instead of defining the maximum metadata size increase in a preprocessor macro, this approach uses the cache's ->size and ->objsize members to determine if the min order increased due to debugging options. If so, the flags specified in the more appropriately named DEBUG_METADATA_FLAGS are masked off. This approach was suggested by Christoph Lameter . Cc: Christoph Lameter Signed-off-by: David Rientjes Signed-off-by: Pekka Enberg --- mm/slub.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 466089cd5deb..a465c0a09fb5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -142,11 +142,11 @@ SLAB_POISON | SLAB_STORE_USER) /* - * Debugging flags that require metadata to be stored in the slab, up to - * DEBUG_SIZE in size. + * Debugging flags that require metadata to be stored in the slab. These get + * disabled when slub_debug=O is used and a cache's min order increases with + * metadata. */ -#define DEBUG_SIZE_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) -#define DEBUG_SIZE (3 * sizeof(void *) + 2 * sizeof(struct track)) +#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) /* * Set of flags that will prevent slab merging @@ -1040,27 +1040,13 @@ static unsigned long kmem_cache_flags(unsigned long objsize, unsigned long flags, const char *name, void (*ctor)(void *)) { - int debug_flags = slub_debug; - /* * Enable debugging if selected on the kernel commandline. */ - if (debug_flags) { - if (slub_debug_slabs && - strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))) - goto out; + if (slub_debug && (!slub_debug_slabs || + !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))) + flags |= slub_debug; - /* - * Disable debugging that increases slab size if the minimum - * slab order would have increased as a result. - */ - if (disable_higher_order_debug && - get_order(objsize + DEBUG_SIZE) > get_order(objsize)) - debug_flags &= ~DEBUG_SIZE_FLAGS; - - flags |= debug_flags; - } -out: return flags; } #else @@ -2488,6 +2474,18 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, if (!calculate_sizes(s, -1)) goto error; + if (disable_higher_order_debug) { + /* + * Disable debugging flags that store metadata if the min slab + * order increased. + */ + if (get_order(s->size) > get_order(s->objsize)) { + s->flags &= ~DEBUG_METADATA_FLAGS; + s->offset = 0; + if (!calculate_sizes(s, -1)) + goto error; + } + } /* * The larger the object size is, the more pages we want on the partial From dcb0ce1bdf39581bcd0cffc3d487fb20667977cd Mon Sep 17 00:00:00 2001 From: "Zhang, Yanmin" Date: Thu, 30 Jul 2009 11:28:11 +0800 Subject: [PATCH 3/9] slub: change kmem_cache->align to record the real alignment kmem_cache->align records the original align parameter value specified by users. Function calculate_alignment might change it based on cache line size. So change kmem_cache->align correspondingly. Signed-off-by: Zhang Yanmin Signed-off-by: Pekka Enberg --- mm/slub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/slub.c b/mm/slub.c index a465c0a09fb5..801fe4b9b68d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2422,6 +2422,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * on bootup. */ align = calculate_alignment(flags, align, s->objsize); + s->align = align; /* * SLUB stores one object immediately after another beginning from From bbff2e433e80fae72c8d00d482927d52ec19ba33 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Thu, 6 Aug 2009 11:36:25 +0300 Subject: [PATCH 4/9] slab: remove duplicate kmem_cache_init_late() declarations kmem_cache_init_late() has been declared in slab.h CC: Nick Piggin CC: Matt Mackall CC: Christoph Lameter Signed-off-by: Wu Fengguang Signed-off-by: Pekka Enberg --- include/linux/slob_def.h | 5 ----- include/linux/slub_def.h | 2 -- mm/slob.c | 5 +++++ 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h index bb5368df4be8..0ec00b39d006 100644 --- a/include/linux/slob_def.h +++ b/include/linux/slob_def.h @@ -34,9 +34,4 @@ static __always_inline void *__kmalloc(size_t size, gfp_t flags) return kmalloc(size, flags); } -static inline void kmem_cache_init_late(void) -{ - /* Nothing to do */ -} - #endif /* __LINUX_SLOB_DEF_H */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index c1c862b1d01a..76c831693616 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -304,6 +304,4 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) } #endif -void __init kmem_cache_init_late(void); - #endif /* _LINUX_SLUB_DEF_H */ diff --git a/mm/slob.c b/mm/slob.c index 9641da3d5e58..837ebd64cc34 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -692,3 +692,8 @@ void __init kmem_cache_init(void) { slob_ready = 1; } + +void __init kmem_cache_init_late(void) +{ + /* Nothing to do */ +} From cf5d11317e8f2671d3115622aec76274a40f4fc2 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 18 Aug 2009 19:11:40 +0300 Subject: [PATCH 5/9] SLUB: Drop write permission to /proc/slabinfo SLUB does not support writes to /proc/slabinfo so there should not be write permission to do that either. Signed-off-by: WANG Cong Signed-off-by: Pekka Enberg --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 801fe4b9b68d..e16c9fb1f48b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4758,7 +4758,7 @@ static const struct file_operations proc_slabinfo_operations = { static int __init slab_proc_init(void) { - proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); + proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations); return 0; } module_init(slab_proc_init); From 5086c389cb897c7ad66c1cacd1abb5ffebaa74b2 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Wed, 19 Aug 2009 21:44:13 +0300 Subject: [PATCH 6/9] SLUB: Fix some coding style issues Signed-off-by: WANG Cong Signed-off-by: Pekka Enberg --- mm/slub.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index b9f1491a58a1..e4c3e3a6028f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1109,8 +1109,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) } if (kmemcheck_enabled - && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) - { + && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { int pages = 1 << oo_order(oo); kmemcheck_alloc_shadow(page, oo_order(oo), flags, node); @@ -2001,7 +2000,7 @@ static inline int calculate_order(int size) return order; fraction /= 2; } - min_objects --; + min_objects--; } /* From acdfcd04d9df7d084ff752f82afad6ed4ad5f363 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Fri, 28 Aug 2009 14:28:54 +0300 Subject: [PATCH 7/9] SLUB: fix ARCH_KMALLOC_MINALIGN cases 64 and 256 If the minalign is 64 bytes, then the 96 byte cache should not be created because it would conflict with the 128 byte cache. If the minalign is 256 bytes, patching the size_index table should not result in a buffer overrun. The calculation "(i - 1) / 8" used to access size_index[] is moved to a separate function as suggested by Christoph Lameter. Acked-by: Christoph Lameter Signed-off-by: Aaro Koskinen Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 6 ++---- mm/slub.c | 30 ++++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 4dcbc2c71491..aa5d4a69d461 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -152,12 +152,10 @@ static __always_inline int kmalloc_index(size_t size) if (size <= KMALLOC_MIN_SIZE) return KMALLOC_SHIFT_LOW; -#if KMALLOC_MIN_SIZE <= 64 - if (size > 64 && size <= 96) + if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96) return 1; - if (size > 128 && size <= 192) + if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192) return 2; -#endif if (size <= 8) return 3; if (size <= 16) return 4; if (size <= 32) return 5; diff --git a/mm/slub.c b/mm/slub.c index e16c9fb1f48b..be493bd63c31 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2825,6 +2825,11 @@ static s8 size_index[24] = { 2 /* 192 */ }; +static inline int size_index_elem(size_t bytes) +{ + return (bytes - 1) / 8; +} + static struct kmem_cache *get_slab(size_t size, gfp_t flags) { int index; @@ -2833,7 +2838,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) if (!size) return ZERO_SIZE_PTR; - index = size_index[(size - 1) / 8]; + index = size_index[size_index_elem(size)]; } else index = fls(size - 1); @@ -3188,10 +3193,12 @@ void __init kmem_cache_init(void) slab_state = PARTIAL; /* Caches that are not of the two-to-the-power-of size */ - if (KMALLOC_MIN_SIZE <= 64) { + if (KMALLOC_MIN_SIZE <= 32) { create_kmalloc_cache(&kmalloc_caches[1], "kmalloc-96", 96, GFP_NOWAIT); caches++; + } + if (KMALLOC_MIN_SIZE <= 64) { create_kmalloc_cache(&kmalloc_caches[2], "kmalloc-192", 192, GFP_NOWAIT); caches++; @@ -3218,17 +3225,28 @@ void __init kmem_cache_init(void) BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); - for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) - size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; + for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { + int elem = size_index_elem(i); + if (elem >= ARRAY_SIZE(size_index)) + break; + size_index[elem] = KMALLOC_SHIFT_LOW; + } - if (KMALLOC_MIN_SIZE == 128) { + if (KMALLOC_MIN_SIZE == 64) { + /* + * The 96 byte size cache is not used if the alignment + * is 64 byte. + */ + for (i = 64 + 8; i <= 96; i += 8) + size_index[size_index_elem(i)] = 7; + } else if (KMALLOC_MIN_SIZE == 128) { /* * The 192 byte sized cache is not used if the alignment * is 128 byte. Redirect kmalloc to use the 256 byte cache * instead. */ for (i = 128 + 8; i <= 192; i += 8) - size_index[(i - 1) / 8] = 8; + size_index[size_index_elem(i)] = 8; } slab_state = UP; From 5788d8ad6c113c589eeaaa48a173adbbe6b1cb3d Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Wed, 22 Jul 2009 11:28:53 +0800 Subject: [PATCH 8/9] slub: release kobject if sysfs_create_group failed in sysfs_slab_add When CONFIG_SLUB_DEBUG is enabled, sysfs_slab_add should unlink and put the kobject if sysfs_create_group failed. Otherwise, sysfs_slab_add returns error then free kmem_cache s, thus memory of s->kobj is leaked. Acked-by: Christoph Lameter Signed-off-by: Xiaotian Feng Signed-off-by: Pekka Enberg --- mm/slub.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index be493bd63c31..d73f771d278f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4593,8 +4593,11 @@ static int sysfs_slab_add(struct kmem_cache *s) } err = sysfs_create_group(&s->kobj, &slab_attr_group); - if (err) + if (err) { + kobject_del(&s->kobj); + kobject_put(&s->kobj); return err; + } kobject_uevent(&s->kobj, KOBJ_ADD); if (!unmergeable) { /* Setup first alias */ From 8a3d271deb0cc9c2fc47317d8e431046382939c9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Sep 2009 16:08:06 +0200 Subject: [PATCH 9/9] slub: fix slab_pad_check() When SLAB_POISON is used and slab_pad_check() finds an overwrite of the slab padding, we call restore_bytes() on the whole slab, not only on the padding. Acked-by: Christoph Lameer Reported-by: Zdenek Kabelac Signed-off-by: Eric Dumazet Signed-off-by: Pekka Enberg --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index d73f771d278f..b1cb2dfa109a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -655,7 +655,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); print_section("Padding", end - remainder, remainder); - restore_bytes(s, "slab padding", POISON_INUSE, start, end); + restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); return 0; }