From f1bf90724de652efdf493ea877ee050e7e6091c4 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sat, 15 Apr 2017 08:54:36 +0200 Subject: [PATCH 01/16] fs/affs: bugfix: make symbolic links work again AFFS symbolic links were broken since kernel 2.6.29 Problem was bisected to the following commit ebd09abbd969 ("vfs: ensure page symlinks are NUL-terminated") commit 035146851cfa ("vfs: introduce helper function to safely NUL-terminate symlinks") AFFS wasn't setting inode size when reading symbolic link from disk or creating a new one. Result was zero allocation in pagecache. ln -s file symlink ls -lrt file symlink -> This patch adds inode isize information on inode get and symbolic link addition. Signed-off-by: Fabian Frederick Signed-off-by: Al Viro --- fs/affs/inode.c | 1 + fs/affs/namei.c | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/affs/inode.c b/fs/affs/inode.c index abcc59899229..fd4ef3c40e40 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -140,6 +140,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino) inode->i_fop = &affs_file_operations; break; case ST_SOFTLINK: + inode->i_size = strlen((char *)AFFS_HEAD(bh)->table); inode->i_mode |= S_IFLNK; inode_nohighmem(inode); inode->i_op = &affs_symlink_inode_operations; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 96dd1d09a273..b02da4d421cc 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -365,6 +365,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) symname++; } *p = 0; + inode->i_size = i + 1; mark_buffer_dirty_inode(bh, inode); affs_brelse(bh); mark_inode_dirty(inode); From d2d58e0e0d6c750941147e505f4263239427e359 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sat, 15 Apr 2017 08:54:56 +0200 Subject: [PATCH 02/16] fs/affs: import amigaffs.h Have that file in global include/linux is not needed. Signed-off-by: Fabian Frederick Signed-off-by: Al Viro --- fs/affs/affs.h | 2 +- {include/linux => fs/affs}/amigaffs.h | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename {include/linux => fs/affs}/amigaffs.h (100%) diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 2f8bab390d13..ba26a316f46e 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include "amigaffs.h" #include #include diff --git a/include/linux/amigaffs.h b/fs/affs/amigaffs.h similarity index 100% rename from include/linux/amigaffs.h rename to fs/affs/amigaffs.h From a9d6cfb70f71e3d7fb4fe90db40f965e7096bdab Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sat, 15 Apr 2017 08:55:11 +0200 Subject: [PATCH 03/16] fs/affs: remove node generation check node generation has to be stored on disk. AFAICS we won't be able to manage it on AFFS. This patch removes relevant check in affs_nfs_get_inode() Signed-off-by: Fabian Frederick Signed-off-by: Al Viro --- fs/affs/namei.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/affs/namei.c b/fs/affs/namei.c index b02da4d421cc..d9a40b5ca4d3 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -478,11 +478,6 @@ static struct inode *affs_nfs_get_inode(struct super_block *sb, u64 ino, if (IS_ERR(inode)) return ERR_CAST(inode); - if (generation && inode->i_generation != generation) { - iput(inode); - return ERR_PTR(-ESTALE); - } - return inode; } From 077e073e8f9ebc6bdd3f3f0324b16db07147a232 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 24 Apr 2017 22:12:57 +0200 Subject: [PATCH 04/16] fs/affs: bugfix: enable writes on OFS disks We called unconditionally affs_bread_ino() with create 0 resulting in "error (device ...): get_block(): strange block request 0" when trying to write on AFFS OFS format. This patch adds create parameter to that function. 0 for affs_readpage_ofs() 1 for affs_write_begin_ofs() Bug was found here: https://bugzilla.kernel.org/show_bug.cgi?id=114961 Signed-off-by: Fabian Frederick Signed-off-by: Al Viro --- fs/affs/file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/affs/file.c b/fs/affs/file.c index 0deec9cc2362..e5c5de6b85e5 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -499,7 +499,7 @@ affs_getemptyblk_ino(struct inode *inode, int block) } static int -affs_do_readpage_ofs(struct page *page, unsigned to) +affs_do_readpage_ofs(struct page *page, unsigned to, int create) { struct inode *inode = page->mapping->host; struct super_block *sb = inode->i_sb; @@ -518,7 +518,7 @@ affs_do_readpage_ofs(struct page *page, unsigned to) boff = tmp % bsize; while (pos < to) { - bh = affs_bread_ino(inode, bidx, 0); + bh = affs_bread_ino(inode, bidx, create); if (IS_ERR(bh)) return PTR_ERR(bh); tmp = min(bsize - boff, to - pos); @@ -620,7 +620,7 @@ affs_readpage_ofs(struct file *file, struct page *page) memset(page_address(page) + to, 0, PAGE_SIZE - to); } - err = affs_do_readpage_ofs(page, to); + err = affs_do_readpage_ofs(page, to, 0); if (!err) SetPageUptodate(page); unlock_page(page); @@ -657,7 +657,7 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping return 0; /* XXX: inefficient but safe in the face of short writes */ - err = affs_do_readpage_ofs(page, PAGE_SIZE); + err = affs_do_readpage_ofs(page, PAGE_SIZE, 1); if (err) { unlock_page(page); put_page(page); From a80f2d2224544f708b50091b18753f4b530f70c1 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 24 Apr 2017 22:13:10 +0200 Subject: [PATCH 05/16] fs/affs: bugfix: Write files greater than page size on OFS Previous AFFS patch fixed OFS write operations but unveiled another bug: files greater than 4KB are being created with a wrong size resulting in errors like the following: dd if=/dev/zero of=file bs=4097 count=1 cp file /mnt/affs/ cp: error writing '/mnt/affs/file': Bad address Signed-off-by: Fabian Frederick Signed-off-by: Al Viro --- fs/affs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/affs/file.c b/fs/affs/file.c index e5c5de6b85e5..196ee7f6fdc4 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -679,7 +679,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, int written; from = pos & (PAGE_SIZE - 1); - to = pos + len; + to = from + len; /* * XXX: not sure if this can handle short copies (len < copied), but * we don't have to, because the page should always be uptodate here, From a0c111b49bbe11c3970bc668600e3b61fbbb7fca Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sun, 9 Apr 2017 09:32:14 +0800 Subject: [PATCH 06/16] fs: drop duplicate header percpu-rwsem.h Drop duplicate header percpu-rwsem.h from linux/fs.h. Signed-off-by: Geliang Tang Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 7251f7bb45e8..dee12c171e07 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -29,7 +29,6 @@ #include #include #include -#include #include #include From cda37124f4e95ad5ccb11394a5802b0972668b32 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 25 Mar 2017 21:15:37 -0700 Subject: [PATCH 07/16] fs: constify tree_descr arrays passed to simple_fill_super() simple_fill_super() is passed an array of tree_descr structures which describe the files to create in the filesystem's root directory. Since these arrays are never modified intentionally, they should be 'const' so that they are placed in .rodata and benefit from memory protection. This patch updates the function signature and all users, and also constifies tree_descr.name. Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- drivers/infiniband/hw/qib/qib_fs.c | 2 +- drivers/xen/xenfs/super.c | 4 ++-- fs/binfmt_misc.c | 2 +- fs/debugfs/inode.c | 2 +- fs/fuse/control.c | 2 +- fs/libfs.c | 2 +- fs/nfsd/nfsctl.c | 2 +- fs/tracefs/inode.c | 2 +- include/linux/fs.h | 5 +++-- kernel/bpf/inode.c | 2 +- security/inode.c | 2 +- security/selinux/selinuxfs.c | 4 ++-- security/smack/smackfs.c | 2 +- 13 files changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index f1e66efea98a..1d940a2885c9 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -512,7 +512,7 @@ static int qibfs_fill_super(struct super_block *sb, void *data, int silent) unsigned long flags; int ret; - static struct tree_descr files[] = { + static const struct tree_descr files[] = { [2] = {"driver_stats", &driver_ops[0], S_IRUGO}, [3] = {"driver_stats_names", &driver_ops[1], S_IRUGO}, {""}, diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 328c3987b112..967f069385d0 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -44,14 +44,14 @@ static const struct file_operations capabilities_file_ops = { static int xenfs_fill_super(struct super_block *sb, void *data, int silent) { - static struct tree_descr xenfs_files[] = { + static const struct tree_descr xenfs_files[] = { [2] = { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR }, { "capabilities", &capabilities_file_ops, S_IRUGO }, { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR }, {""}, }; - static struct tree_descr xenfs_init_files[] = { + static const struct tree_descr xenfs_init_files[] = { [2] = { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR }, { "capabilities", &capabilities_file_ops, S_IRUGO }, { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR }, diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index bee1a36bc2ec..f4718098ac31 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -818,7 +818,7 @@ static const struct super_operations s_ops = { static int bm_fill_super(struct super_block *sb, void *data, int silent) { int err; - static struct tree_descr bm_files[] = { + static const struct tree_descr bm_files[] = { [2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO}, [3] = {"register", &bm_register_operations, S_IWUSR}, /* last one */ {""} diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 7fd4ec4bb214..e892ae7d89f8 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -199,7 +199,7 @@ static const struct dentry_operations debugfs_dops = { static int debug_fill_super(struct super_block *sb, void *data, int silent) { - static struct tree_descr debug_files[] = {{""}}; + static const struct tree_descr debug_files[] = {{""}}; struct debugfs_fs_info *fsi; int err; diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 6e22748b0704..b9ea99c5b5b3 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -292,7 +292,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc) static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) { - struct tree_descr empty_descr = {""}; + static const struct tree_descr empty_descr = {""}; struct fuse_conn *fc; int err; diff --git a/fs/libfs.c b/fs/libfs.c index a8b62e5d43a9..a04395334bb1 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -507,7 +507,7 @@ EXPORT_SYMBOL(simple_write_end); * to pass it an appropriate max_reserved value to avoid collisions. */ int simple_fill_super(struct super_block *s, unsigned long magic, - struct tree_descr *files) + const struct tree_descr *files) { struct inode *inode; struct dentry *root; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 8bf8f667a8cf..6493df6b1bd5 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1146,7 +1146,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) static int nfsd_fill_super(struct super_block * sb, void * data, int silent) { - static struct tree_descr nfsd_files[] = { + static const struct tree_descr nfsd_files[] = { [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", &export_features_operations, S_IRUGO}, diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 21d36d284735..328e89c2cf83 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -266,7 +266,7 @@ static const struct super_operations tracefs_super_operations = { static int trace_fill_super(struct super_block *sb, void *data, int silent) { - static struct tree_descr trace_files[] = {{""}}; + static const struct tree_descr trace_files[] = {{""}}; struct tracefs_fs_info *fsi; int err; diff --git a/include/linux/fs.h b/include/linux/fs.h index dee12c171e07..fc1b4faa6272 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2995,9 +2995,10 @@ extern const struct file_operations simple_dir_operations; extern const struct inode_operations simple_dir_inode_operations; extern void make_empty_dir_inode(struct inode *inode); extern bool is_empty_dir_inode(struct inode *inode); -struct tree_descr { char *name; const struct file_operations *ops; int mode; }; +struct tree_descr { const char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); -extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); +extern int simple_fill_super(struct super_block *, unsigned long, + const struct tree_descr *); extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); extern void simple_release_fs(struct vfsmount **mount, int *count); diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index fddcae801724..9bbd33497d3d 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -429,7 +429,7 @@ static int bpf_parse_options(char *data, struct bpf_mount_opts *opts) static int bpf_fill_super(struct super_block *sb, void *data, int silent) { - static struct tree_descr bpf_rfiles[] = { { "" } }; + static const struct tree_descr bpf_rfiles[] = { { "" } }; struct bpf_mount_opts opts; struct inode *inode; int ret; diff --git a/security/inode.c b/security/inode.c index 2cb14162ff8d..eccd58ef2ae8 100644 --- a/security/inode.c +++ b/security/inode.c @@ -28,7 +28,7 @@ static int mount_count; static int fill_super(struct super_block *sb, void *data, int silent) { - static struct tree_descr files[] = {{""}}; + static const struct tree_descr files[] = {{""}}; return simple_fill_super(sb, SECURITYFS_MAGIC, files); } diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index cb3fd98fb05a..6a9efedf7eb2 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1496,7 +1496,7 @@ static const struct file_operations sel_avc_cache_stats_ops = { static int sel_make_avc_files(struct dentry *dir) { int i; - static struct tree_descr files[] = { + static const struct tree_descr files[] = { { "cache_threshold", &sel_avc_cache_threshold_ops, S_IRUGO|S_IWUSR }, { "hash_stats", &sel_avc_hash_stats_ops, S_IRUGO }, @@ -1805,7 +1805,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) struct inode *inode; struct inode_security_struct *isec; - static struct tree_descr selinux_files[] = { + static const struct tree_descr selinux_files[] = { [SEL_LOAD] = {"load", &sel_load_ops, S_IRUSR|S_IWUSR}, [SEL_ENFORCE] = {"enforce", &sel_enforce_ops, S_IRUGO|S_IWUSR}, [SEL_CONTEXT] = {"context", &transaction_ops, S_IRUGO|S_IWUGO}, diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 366b8356f75b..f6482e53d55a 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -2855,7 +2855,7 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent) int rc; struct inode *root_inode; - static struct tree_descr smack_files[] = { + static const struct tree_descr smack_files[] = { [SMK_LOAD] = { "load", &smk_load_ops, S_IRUGO|S_IWUSR}, [SMK_CIPSO] = { From 020c2833dbc76b4069c9a9886b71511052d160df Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 25 Mar 2017 21:02:18 -0700 Subject: [PATCH 08/16] fs: remove _submit_bh() _submit_bh() allowed submitting a buffer_head for I/O using custom bio_flags. It used to be used by jbd to set BIO_SNAP_STABLE, introduced by commit 713685111774 ("mm: make snapshotting pages for stable writes a per-bio operation"). However, the code and flag has since been removed and no _submit_bh() users remain. These days, bio_flags are mostly used internally by the block layer to track the state of bio's. As such, it doesn't really make sense for filesystems to use them instead of op_flags when wanting special behavior for block requests. Therefore, remove _submit_bh() and trim the bio_flags argument from submit_bh_wbc(). Cc: Darrick J. Wong Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- fs/buffer.c | 19 +++++-------------- include/linux/buffer_head.h | 2 -- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 9196f2a270da..68dc05ce06a5 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -49,7 +49,6 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, - unsigned long bio_flags, struct writeback_control *wbc); #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) @@ -1830,7 +1829,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, 0, wbc); + submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); nr_underway++; } bh = next; @@ -1884,7 +1883,7 @@ recover: struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { clear_buffer_dirty(bh); - submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, 0, wbc); + submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); nr_underway++; } bh = next; @@ -3095,7 +3094,7 @@ void guard_bio_eod(int op, struct bio *bio) } static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, - unsigned long bio_flags, struct writeback_control *wbc) + struct writeback_control *wbc) { struct bio *bio; @@ -3130,7 +3129,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, bio->bi_end_io = end_bio_bh_io_sync; bio->bi_private = bh; - bio->bi_flags |= bio_flags; /* Take care of bh's that straddle the end of the device */ guard_bio_eod(op, bio); @@ -3145,16 +3143,9 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, return 0; } -int _submit_bh(int op, int op_flags, struct buffer_head *bh, - unsigned long bio_flags) +int submit_bh(int op, int op_flags, struct buffer_head *bh) { - return submit_bh_wbc(op, op_flags, bh, bio_flags, NULL); -} -EXPORT_SYMBOL_GPL(_submit_bh); - -int submit_bh(int op, int op_flags, struct buffer_head *bh) -{ - return submit_bh_wbc(op, op_flags, bh, 0, NULL); + return submit_bh_wbc(op, op_flags, bh, NULL); } EXPORT_SYMBOL(submit_bh); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 79591c3660cc..bd029e52ef5e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -196,8 +196,6 @@ void ll_rw_block(int, int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, int op_flags); void write_dirty_buffer(struct buffer_head *bh, int op_flags); -int _submit_bh(int op, int op_flags, struct buffer_head *bh, - unsigned long bio_flags); int submit_bh(int, int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); From 80f18379a7c350c011d30332658aa15fe49a8fa5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Apr 2017 09:42:24 +0200 Subject: [PATCH 09/16] fs: add a VALID_OPEN_FLAGS Add a central define for all valid open flags, and use it in the uniqueness check. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/fcntl.c | 14 ++++---------- include/linux/fcntl.h | 6 ++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index be8fbe289087..de1b16bb6a29 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -742,16 +742,10 @@ static int __init fcntl_init(void) * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others. */ - BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( - O_RDONLY | O_WRONLY | O_RDWR | - O_CREAT | O_EXCL | O_NOCTTY | - O_TRUNC | O_APPEND | /* O_NONBLOCK | */ - __O_SYNC | O_DSYNC | FASYNC | - O_DIRECT | O_LARGEFILE | O_DIRECTORY | - O_NOFOLLOW | O_NOATIME | O_CLOEXEC | - __FMODE_EXEC | O_PATH | __O_TMPFILE | - __FMODE_NONOTIFY - )); + BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != + HWEIGHT32( + (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | + __FMODE_EXEC | __FMODE_NONOTIFY)); fasync_cache = kmem_cache_create("fasync_cache", sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h index 76ce329e656d..1b48d9c9a561 100644 --- a/include/linux/fcntl.h +++ b/include/linux/fcntl.h @@ -3,6 +3,12 @@ #include +/* list of all valid flags for the open/openat flags argument: */ +#define VALID_OPEN_FLAGS \ + (O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | \ + O_APPEND | O_NDELAY | O_NONBLOCK | O_NDELAY | __O_SYNC | O_DSYNC | \ + FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \ + O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE) #ifndef force_o_largefile #define force_o_largefile() (BITS_PER_LONG != 32) From 629e014bb8349fcf7c1e4df19a842652ece1c945 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Apr 2017 09:42:25 +0200 Subject: [PATCH 10/16] fs: completely ignore unknown open flags Currently we just stash anything we got into file->f_flags, and the report it in fcntl(F_GETFD). This patch just clears out all unknown flags so that we don't pass them to the fs or report them. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/open.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/open.c b/fs/open.c index 949cef29c3bb..7bba2b952f1e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -900,6 +900,12 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o int lookup_flags = 0; int acc_mode = ACC_MODE(flags); + /* + * Clear out all open flags we don't know about so that we don't report + * them in fcntl(F_GETFD) or similar interfaces. + */ + flags &= VALID_OPEN_FLAGS; + if (flags & (O_CREAT | __O_TMPFILE)) op->mode = (mode & S_IALLUGO) | S_IFREG; else From 0b33540f9d751b565c270d23a077ee6d9fc9e1d5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 29 Apr 2017 17:42:25 -0400 Subject: [PATCH 11/16] remove pointless extern of atime_need_update_rcu() Signed-off-by: Al Viro --- fs/internal.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index 11c6d89dce9c..7e1847dd4e3b 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -128,8 +128,6 @@ static inline bool atime_needs_update_rcu(const struct path *path, return __atime_needs_update(path, inode, true); } -extern bool atime_needs_update_rcu(const struct path *, struct inode *); - /* * fs-writeback.c */ From 9280cdd6fe5b8287a726d24cc1d558b96c8491d7 Mon Sep 17 00:00:00 2001 From: Mark Charlebois Date: Fri, 28 Apr 2017 15:15:12 -0700 Subject: [PATCH 12/16] fs: compat: Remove warning from COMPATIBLE_IOCTL cmd in COMPATIBLE_IOCTL is always a u32, so cast it so there isn't a warning about an overflow in XFORM. From: Mark Charlebois Signed-off-by: Mark Charlebois Signed-off-by: Behan Webster Signed-off-by: Matthias Kaehlcke Acked-by: Arnd Bergmann Signed-off-by: Al Viro --- fs/compat_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 11d087b2b28e..6116d5275a3e 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -833,7 +833,7 @@ static int compat_ioctl_preallocate(struct file *file, */ #define XFORM(i) (((i) ^ ((i) << 27) ^ ((i) << 17)) & 0xffffffff) -#define COMPATIBLE_IOCTL(cmd) XFORM(cmd), +#define COMPATIBLE_IOCTL(cmd) XFORM((u32)cmd), /* ioctl should not be warned about even if it's not implemented. Valid reasons to use this: - It is implemented with ->compat_ioctl on some device, but programs From 563f40019d16e5299e9edd3eac37846760637076 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 18 Apr 2017 16:04:17 -0400 Subject: [PATCH 13/16] fs: don't set *REFERENCED on single use objects By default we set DCACHE_REFERENCED and I_REFERENCED on any dentry or inode we create. This is problematic as this means that it takes two trips through the LRU for any of these objects to be reclaimed, regardless of their actual lifetime. With enough pressure from these caches we can easily evict our working set from page cache with single use objects. So instead only set *REFERENCED if we've already been added to the LRU list. This means that we've been touched since the first time we were accessed, and so more likely to need to hang out in cache. To illustrate this issue I wrote the following scripts https://github.com/josefbacik/debug-scripts/tree/master/cache-pressure on my test box. It is a single socket 4 core CPU with 16gib of RAM and I tested on an Intel 2tib NVME drive. The cache-pressure.sh script creates a new file system and creates 2 6.5gib files in order to take up 13gib of the 16gib of ram with pagecache. Then it runs a test program that reads these 2 files in a loop, and keeps track of how often it has to read bytes for each loop. On an ideal system with no pressure we should have to read 0 bytes indefinitely. The second thing this script does is start a fs_mark job that creates a ton of 0 length files, putting pressure on the system with slab only allocations. On exit the script prints out how many bytes were read by the read-file program. The results are as follows Without patch: /mnt/btrfs-test/reads/file1: total read during loops 27262988288 /mnt/btrfs-test/reads/file2: total read during loops 27262976000 With patch: /mnt/btrfs-test/reads/file2: total read during loops 18640457728 /mnt/btrfs-test/reads/file1: total read during loops 9565376512 This patch results in a 50% reduction of the amount of pages evicted from our working set. Signed-off-by: Josef Bacik Signed-off-by: Al Viro --- fs/dcache.c | 4 ++-- fs/inode.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 95d71eda8142..cddf39777835 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -419,6 +419,8 @@ static void dentry_lru_add(struct dentry *dentry) { if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) d_lru_add(dentry); + else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED))) + dentry->d_flags |= DCACHE_REFERENCED; } /** @@ -779,8 +781,6 @@ repeat: goto kill_it; } - if (!(dentry->d_flags & DCACHE_REFERENCED)) - dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); dentry->d_lockref.count--; diff --git a/fs/inode.c b/fs/inode.c index 88110fd0b282..9dfa8f16981d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -405,6 +405,8 @@ static void inode_lru_list_add(struct inode *inode) { if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_inc(nr_unused); + else + inode->i_state |= I_REFERENCED; } /* @@ -1492,7 +1494,6 @@ static void iput_final(struct inode *inode) drop = generic_drop_inode(inode); if (!drop && (sb->s_flags & MS_ACTIVE)) { - inode->i_state |= I_REFERENCED; inode_add_lru(inode); spin_unlock(&inode->i_lock); return; From deccf497d804a4c5fca2dbfad2f104675a6f9102 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 May 2017 23:30:16 +0100 Subject: [PATCH 14/16] Make stat/lstat/fstatat pass AT_NO_AUTOMOUNT to vfs_statx() stat/lstat/fstatat need to pass AT_NO_AUTOMOUNT to vfs_statx() as the pre-statx code didn't set LOOKUP_AUTOMOUNT, even though fstatat() accepted the AT_NO_AUTOMOUNT flag. Fixes: a528d35e8bfc ("statx: Add a system call to make enhanced file info available") Reported-by: Ian Kent Signed-off-by: David Howells Tested-by: Ian Kent cc: stable@vger.kernel.org Signed-off-by: Al Viro --- include/linux/fs.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index fc1b4faa6272..866c955314db 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2920,17 +2920,19 @@ extern int vfs_statx_fd(unsigned int, struct kstat *, u32, unsigned int); static inline int vfs_stat(const char __user *filename, struct kstat *stat) { - return vfs_statx(AT_FDCWD, filename, 0, stat, STATX_BASIC_STATS); + return vfs_statx(AT_FDCWD, filename, AT_NO_AUTOMOUNT, + stat, STATX_BASIC_STATS); } static inline int vfs_lstat(const char __user *name, struct kstat *stat) { - return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW, + return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT, stat, STATX_BASIC_STATS); } static inline int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { - return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); + return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, + stat, STATX_BASIC_STATS); } static inline int vfs_fstat(int fd, struct kstat *stat) { From c6184028a75cf63036267089741ec7ef975ebc27 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Fri, 5 May 2017 20:51:42 +0200 Subject: [PATCH 15/16] fs/affs: add rename2 to prepare multiple methods Currently AFFS only supports RENAME_NOREPLACE. This patch isolates that method to a static function to prepare RENAME_EXCHANGE addition. Signed-off-by: Fabian Frederick Signed-off-by: Al Viro --- fs/affs/affs.h | 2 +- fs/affs/dir.c | 2 +- fs/affs/namei.c | 25 ++++++++++++++++--------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/fs/affs/affs.h b/fs/affs/affs.h index ba26a316f46e..773749be8290 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -173,7 +173,7 @@ extern int affs_link(struct dentry *olddentry, struct inode *dir, struct dentry *dentry); extern int affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname); -extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry, +extern int affs_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); diff --git a/fs/affs/dir.c b/fs/affs/dir.c index f1e7294381c5..591ecd7f3063 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -35,7 +35,7 @@ const struct inode_operations affs_dir_inode_operations = { .symlink = affs_symlink, .mkdir = affs_mkdir, .rmdir = affs_rmdir, - .rename = affs_rename, + .rename = affs_rename2, .setattr = affs_notify_change, }; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index d9a40b5ca4d3..49a8dc12d604 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -394,21 +394,14 @@ affs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) return affs_add_entry(dir, inode, dentry, ST_LINKFILE); } -int +static int affs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - unsigned int flags) + struct inode *new_dir, struct dentry *new_dentry) { struct super_block *sb = old_dir->i_sb; struct buffer_head *bh = NULL; int retval; - if (flags & ~RENAME_NOREPLACE) - return -EINVAL; - - pr_debug("%s(old=%lu,\"%pd\" to new=%lu,\"%pd\")\n", __func__, - old_dir->i_ino, old_dentry, new_dir->i_ino, new_dentry); - retval = affs_check_name(new_dentry->d_name.name, new_dentry->d_name.len, affs_nofilenametruncate(old_dentry)); @@ -448,6 +441,20 @@ done: return retval; } +int affs_rename2(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; + + pr_debug("%s(old=%lu,\"%pd\" to new=%lu,\"%pd\")\n", __func__, + old_dir->i_ino, old_dentry, new_dir->i_ino, new_dentry); + + return affs_rename(old_dir, old_dentry, new_dir, new_dentry); +} + static struct dentry *affs_get_parent(struct dentry *child) { struct inode *parent; From 6b4657667ba06cd2e07206c65e630acdf248bfd1 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Fri, 5 May 2017 20:52:07 +0200 Subject: [PATCH 16/16] fs/affs: add rename exchange Process RENAME_EXCHANGE in affs_rename2() adding static affs_xrename() based on affs_rename(). We remove headers from respective directories then affect bh to other inode directory entries for swapping. Signed-off-by: Fabian Frederick Signed-off-by: Al Viro --- fs/affs/namei.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 49a8dc12d604..46d3ace6761d 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -441,17 +441,73 @@ done: return retval; } +static int +affs_xrename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + + struct super_block *sb = old_dir->i_sb; + struct buffer_head *bh_old = NULL; + struct buffer_head *bh_new = NULL; + int retval; + + bh_old = affs_bread(sb, d_inode(old_dentry)->i_ino); + if (!bh_old) + return -EIO; + + bh_new = affs_bread(sb, d_inode(new_dentry)->i_ino); + if (!bh_new) + return -EIO; + + /* Remove old header from its parent directory. */ + affs_lock_dir(old_dir); + retval = affs_remove_hash(old_dir, bh_old); + affs_unlock_dir(old_dir); + if (retval) + goto done; + + /* Remove new header from its parent directory. */ + affs_lock_dir(new_dir); + retval = affs_remove_hash(new_dir, bh_new); + affs_unlock_dir(new_dir); + if (retval) + goto done; + + /* Insert old into the new directory with the new name. */ + affs_copy_name(AFFS_TAIL(sb, bh_old)->name, new_dentry); + affs_fix_checksum(sb, bh_old); + affs_lock_dir(new_dir); + retval = affs_insert_hash(new_dir, bh_old); + affs_unlock_dir(new_dir); + + /* Insert new into the old directory with the old name. */ + affs_copy_name(AFFS_TAIL(sb, bh_new)->name, old_dentry); + affs_fix_checksum(sb, bh_new); + affs_lock_dir(old_dir); + retval = affs_insert_hash(old_dir, bh_new); + affs_unlock_dir(old_dir); +done: + mark_buffer_dirty_inode(bh_old, new_dir); + mark_buffer_dirty_inode(bh_new, old_dir); + affs_brelse(bh_old); + affs_brelse(bh_new); + return retval; +} + int affs_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { - if (flags & ~RENAME_NOREPLACE) + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) return -EINVAL; pr_debug("%s(old=%lu,\"%pd\" to new=%lu,\"%pd\")\n", __func__, old_dir->i_ino, old_dentry, new_dir->i_ino, new_dentry); + if (flags & RENAME_EXCHANGE) + return affs_xrename(old_dir, old_dentry, new_dir, new_dentry); + return affs_rename(old_dir, old_dentry, new_dir, new_dentry); }