diff --git a/include/linux/slab.h b/include/linux/slab.h index bc189a43e680..fd0ef2e16178 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -152,7 +152,7 @@ void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); void memcg_create_kmem_cache(struct mem_cgroup *, struct kmem_cache *); -void memcg_deactivate_kmem_caches(struct mem_cgroup *); +void memcg_deactivate_kmem_caches(struct mem_cgroup *, struct mem_cgroup *); /* * Please use this macro to create slab caches. Simply specify the diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fa39e51b3d94..2cb7e4e5c51a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3284,15 +3284,15 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg) */ memcg->kmem_state = KMEM_ALLOCATED; - memcg_deactivate_kmem_caches(memcg); - - kmemcg_id = memcg->kmemcg_id; - BUG_ON(kmemcg_id < 0); - parent = parent_mem_cgroup(memcg); if (!parent) parent = root_mem_cgroup; + memcg_deactivate_kmem_caches(memcg, parent); + + kmemcg_id = memcg->kmemcg_id; + BUG_ON(kmemcg_id < 0); + /* * Change kmemcg_id of this cgroup and all its descendants to the * parent's id, and then move all entries from this cgroup's list_lrus @@ -3325,7 +3325,6 @@ static void memcg_free_kmem(struct mem_cgroup *memcg) if (memcg->kmem_state == KMEM_ALLOCATED) { WARN_ON(!list_empty(&memcg->kmem_caches)); static_branch_dec(&memcg_kmem_enabled_key); - WARN_ON(page_counter_read(&memcg->kmem)); } } #else @@ -4773,6 +4772,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) /* The following stuff does not apply to the root */ if (!parent) { +#ifdef CONFIG_MEMCG_KMEM + INIT_LIST_HEAD(&memcg->kmem_caches); +#endif root_mem_cgroup = memcg; return &memcg->css; } diff --git a/mm/slab.h b/mm/slab.h index 7ead47cb9338..a62372d0f271 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -261,6 +261,9 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s) * which do not have slab_cache pointer set. * So this function assumes that the page can pass PageHead() and PageSlab() * checks. + * + * The kmem_cache can be reparented asynchronously. The caller must ensure + * the memcg lifetime, e.g. by taking rcu_read_lock() or cgroup_mutex. */ static inline struct mem_cgroup *memcg_from_slab_page(struct page *page) { @@ -268,7 +271,7 @@ static inline struct mem_cgroup *memcg_from_slab_page(struct page *page) s = READ_ONCE(page->slab_cache); if (s && !is_root_cache(s)) - return s->memcg_params.memcg; + return READ_ONCE(s->memcg_params.memcg); return NULL; } @@ -285,10 +288,22 @@ static __always_inline int memcg_charge_slab(struct page *page, struct lruvec *lruvec; int ret; - memcg = s->memcg_params.memcg; + rcu_read_lock(); + memcg = READ_ONCE(s->memcg_params.memcg); + while (memcg && !css_tryget_online(&memcg->css)) + memcg = parent_mem_cgroup(memcg); + rcu_read_unlock(); + + if (unlikely(!memcg || mem_cgroup_is_root(memcg))) { + mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), + (1 << order)); + percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order); + return 0; + } + ret = memcg_kmem_charge_memcg(page, gfp, order, memcg); if (ret) - return ret; + goto out; lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg); mod_lruvec_state(lruvec, cache_vmstat_idx(s), 1 << order); @@ -296,8 +311,9 @@ static __always_inline int memcg_charge_slab(struct page *page, /* transer try_charge() page references to kmem_cache */ percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order); css_put_many(&memcg->css, 1 << order); - - return 0; +out: + css_put(&memcg->css); + return ret; } /* @@ -310,10 +326,17 @@ static __always_inline void memcg_uncharge_slab(struct page *page, int order, struct mem_cgroup *memcg; struct lruvec *lruvec; - memcg = s->memcg_params.memcg; - lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg); - mod_lruvec_state(lruvec, cache_vmstat_idx(s), -(1 << order)); - memcg_kmem_uncharge_memcg(page, order, memcg); + rcu_read_lock(); + memcg = READ_ONCE(s->memcg_params.memcg); + if (likely(!mem_cgroup_is_root(memcg))) { + lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg); + mod_lruvec_state(lruvec, cache_vmstat_idx(s), -(1 << order)); + memcg_kmem_uncharge_memcg(page, order, memcg); + } else { + mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), + -(1 << order)); + } + rcu_read_unlock(); percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order); } diff --git a/mm/slab_common.c b/mm/slab_common.c index ee3971f7fabc..b893eefb6229 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -252,7 +252,8 @@ static void memcg_unlink_cache(struct kmem_cache *s) } else { list_del(&s->memcg_params.children_node); list_del(&s->memcg_params.kmem_caches_node); - css_put(&s->memcg_params.memcg->css); + mem_cgroup_put(s->memcg_params.memcg); + WRITE_ONCE(s->memcg_params.memcg, NULL); } } #else @@ -785,11 +786,13 @@ unlock: spin_unlock_irq(&memcg_kmem_wq_lock); } -void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) +void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg, + struct mem_cgroup *parent) { int idx; struct memcg_cache_array *arr; struct kmem_cache *s, *c; + unsigned int nr_reparented; idx = memcg_cache_id(memcg); @@ -807,6 +810,18 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) kmemcg_cache_deactivate(c); arr->entries[idx] = NULL; } + nr_reparented = 0; + list_for_each_entry(s, &memcg->kmem_caches, + memcg_params.kmem_caches_node) { + WRITE_ONCE(s->memcg_params.memcg, parent); + css_put(&memcg->css); + nr_reparented++; + } + if (nr_reparented) { + list_splice_init(&memcg->kmem_caches, + &parent->kmem_caches); + css_get_many(&parent->css, nr_reparented); + } mutex_unlock(&slab_mutex); put_online_mems();