alistair23-linux/fs/internal.h
Vladimir Davydov 503c358cf1 list_lru: introduce list_lru_shrink_{count,walk}
Kmem accounting of memcg is unusable now, because it lacks slab shrinker
support.  That means when we hit the limit we will get ENOMEM w/o any
chance to recover.  What we should do then is to call shrink_slab, which
would reclaim old inode/dentry caches from this cgroup.  This is what
this patch set is intended to do.

Basically, it does two things.  First, it introduces the notion of
per-memcg slab shrinker.  A shrinker that wants to reclaim objects per
cgroup should mark itself as SHRINKER_MEMCG_AWARE.  Then it will be
passed the memory cgroup to scan from in shrink_control->memcg.  For
such shrinkers shrink_slab iterates over the whole cgroup subtree under
the target cgroup and calls the shrinker for each kmem-active memory
cgroup.

Secondly, this patch set makes the list_lru structure per-memcg.  It's
done transparently to list_lru users - everything they have to do is to
tell list_lru_init that they want memcg-aware list_lru.  Then the
list_lru will automatically distribute objects among per-memcg lists
basing on which cgroup the object is accounted to.  This way to make FS
shrinkers (icache, dcache) memcg-aware we only need to make them use
memcg-aware list_lru, and this is what this patch set does.

As before, this patch set only enables per-memcg kmem reclaim when the
pressure goes from memory.limit, not from memory.kmem.limit.  Handling
memory.kmem.limit is going to be tricky due to GFP_NOFS allocations, and
it is still unclear whether we will have this knob in the unified
hierarchy.

This patch (of 9):

NUMA aware slab shrinkers use the list_lru structure to distribute
objects coming from different NUMA nodes to different lists.  Whenever
such a shrinker needs to count or scan objects from a particular node,
it issues commands like this:

        count = list_lru_count_node(lru, sc->nid);
        freed = list_lru_walk_node(lru, sc->nid, isolate_func,
                                   isolate_arg, &sc->nr_to_scan);

where sc is an instance of the shrink_control structure passed to it
from vmscan.

To simplify this, let's add special list_lru functions to be used by
shrinkers, list_lru_shrink_count() and list_lru_shrink_walk(), which
consolidate the nid and nr_to_scan arguments in the shrink_control
structure.

This will also allow us to avoid patching shrinkers that use list_lru
when we make shrink_slab() per-memcg - all we will have to do is extend
the shrink_control structure to include the target memcg and make
list_lru_shrink_{count,walk} handle this appropriately.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Suggested-by: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Greg Thelen <gthelen@google.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-02-12 18:54:08 -08:00

154 lines
3.6 KiB
C

/* fs/ internal definitions
*
* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
struct super_block;
struct file_system_type;
struct linux_binprm;
struct path;
struct mount;
struct shrink_control;
/*
* block_dev.c
*/
#ifdef CONFIG_BLOCK
extern void __init bdev_cache_init(void);
extern int __sync_blockdev(struct block_device *bdev, int wait);
#else
static inline void bdev_cache_init(void)
{
}
static inline int __sync_blockdev(struct block_device *bdev, int wait)
{
return 0;
}
#endif
/*
* buffer.c
*/
extern void guard_bio_eod(int rw, struct bio *bio);
/*
* char_dev.c
*/
extern void __init chrdev_init(void);
/*
* namei.c
*/
extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);
/*
* namespace.c
*/
extern int copy_mount_options(const void __user *, unsigned long *);
extern char *copy_mount_string(const void __user *);
extern struct vfsmount *lookup_mnt(struct path *);
extern int finish_automount(struct vfsmount *, struct path *);
extern int sb_prepare_remount_readonly(struct super_block *);
extern void __init mnt_init(void);
extern int __mnt_want_write(struct vfsmount *);
extern int __mnt_want_write_file(struct file *);
extern void __mnt_drop_write(struct vfsmount *);
extern void __mnt_drop_write_file(struct file *);
/*
* fs_struct.c
*/
extern void chroot_fs_refs(const struct path *, const struct path *);
/*
* file_table.c
*/
extern struct file *get_empty_filp(void);
/*
* super.c
*/
extern int do_remount_sb(struct super_block *, int, void *, int);
extern bool grab_super_passive(struct super_block *sb);
extern struct dentry *mount_fs(struct file_system_type *,
int, const char *, void *);
extern struct super_block *user_get_super(dev_t);
/*
* open.c
*/
struct open_flags {
int open_flag;
umode_t mode;
int acc_mode;
int intent;
int lookup_flags;
};
extern struct file *do_filp_open(int dfd, struct filename *pathname,
const struct open_flags *op);
extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
const char *, const struct open_flags *);
extern long do_handle_open(int mountdirfd,
struct file_handle __user *ufh, int open_flag);
extern int open_check_o_direct(struct file *f);
/*
* inode.c
*/
extern spinlock_t inode_sb_list_lock;
extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
extern void inode_add_lru(struct inode *inode);
/*
* fs-writeback.c
*/
extern void inode_wb_list_del(struct inode *inode);
extern long get_nr_dirty_inodes(void);
extern void evict_inodes(struct super_block *);
extern int invalidate_inodes(struct super_block *, bool);
/*
* dcache.c
*/
extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
extern int d_set_mounted(struct dentry *dentry);
extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
/*
* read_write.c
*/
extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
/*
* pipe.c
*/
extern const struct file_operations pipefifo_fops;
/*
* fs_pin.c
*/
extern void sb_pin_kill(struct super_block *sb);
extern void mnt_pin_kill(struct mount *m);
/*
* fs/nsfs.c
*/
extern struct dentry_operations ns_dentry_operations;