From 1e6cb72399fd58b38a1c11055ef18fe01f535cda Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 1 Sep 2018 10:41:11 +0300 Subject: [PATCH 01/13] fsnotify: add super block object type Add the infrastructure to attach a mark to a super_block struct and detach all attached marks when super block is destroyed. This is going to be used by fanotify backend to setup super block marks. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fdinfo.c | 5 +++++ fs/notify/fsnotify.c | 8 +++++++- fs/notify/fsnotify.h | 11 +++++++++++ fs/notify/mark.c | 4 ++++ fs/super.c | 2 +- include/linux/fs.h | 5 +++++ include/linux/fsnotify_backend.h | 17 ++++++++++++++--- 7 files changed, 47 insertions(+), 5 deletions(-) diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 86fcf5814279..25385e336ac7 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -131,6 +131,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n", mnt->mnt_id, mflags, mark->mask, mark->ignored_mask); + } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) { + struct super_block *sb = fsnotify_conn_sb(mark->connector); + + seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n", + sb->s_dev, mflags, mark->mask, mark->ignored_mask); } } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index ababdbfab537..2971803d151c 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -48,7 +48,7 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) * Called during unmount with no locks held, so needs to be safe against * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block. */ -void fsnotify_unmount_inodes(struct super_block *sb) +static void fsnotify_unmount_inodes(struct super_block *sb) { struct inode *inode, *iput_inode = NULL; @@ -98,6 +98,12 @@ void fsnotify_unmount_inodes(struct super_block *sb) iput(iput_inode); } +void fsnotify_sb_delete(struct super_block *sb) +{ + fsnotify_unmount_inodes(sb); + fsnotify_clear_marks_by_sb(sb); +} + /* * Given an inode, first check if we care what happens to our children. Inotify * and dnotify both tell their parents about events. If we care about any event diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 7902653dd577..5a00121fb219 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -21,6 +21,12 @@ static inline struct mount *fsnotify_conn_mount( return container_of(conn->obj, struct mount, mnt_fsnotify_marks); } +static inline struct super_block *fsnotify_conn_sb( + struct fsnotify_mark_connector *conn) +{ + return container_of(conn->obj, struct super_block, s_fsnotify_marks); +} + /* destroy all events sitting in this groups notification queue */ extern void fsnotify_flush_notify(struct fsnotify_group *group); @@ -43,6 +49,11 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) { fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks); } +/* run the list of all marks associated with sb and destroy them */ +static inline void fsnotify_clear_marks_by_sb(struct super_block *sb) +{ + fsnotify_destroy_marks(&sb->s_fsnotify_marks); +} /* Wait until all marks queued for destruction are destroyed */ extern void fsnotify_wait_marks_destroyed(void); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 59cdb27826de..b5172ccb2e60 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -115,6 +115,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn) return &fsnotify_conn_inode(conn)->i_fsnotify_mask; else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask; + else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) + return &fsnotify_conn_sb(conn)->s_fsnotify_mask; return NULL; } @@ -192,6 +194,8 @@ static struct inode *fsnotify_detach_connector_from_object( inode->i_fsnotify_mask = 0; } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; + } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { + fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; } rcu_assign_pointer(*(conn->obj), NULL); diff --git a/fs/super.c b/fs/super.c index f3a8c008e164..ca53a08497ed 100644 --- a/fs/super.c +++ b/fs/super.c @@ -442,7 +442,7 @@ void generic_shutdown_super(struct super_block *sb) sync_filesystem(sb); sb->s_flags &= ~SB_ACTIVE; - fsnotify_unmount_inodes(sb); + fsnotify_sb_delete(sb); cgroup_writeback_umount(); evict_inodes(sb); diff --git a/include/linux/fs.h b/include/linux/fs.h index 33322702c910..2c14801d0aa3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1464,6 +1464,11 @@ struct super_block { spinlock_t s_inode_wblist_lock; struct list_head s_inodes_wb; /* writeback inodes */ + +#ifdef CONFIG_FSNOTIFY + __u32 s_fsnotify_mask; + struct fsnotify_mark_connector __rcu *s_fsnotify_marks; +#endif } __randomize_layout; /* Helper functions so that in most cases filesystems will diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index b8f4182f42f1..81b88fc9df31 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -206,12 +206,14 @@ struct fsnotify_group { enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_INODE, FSNOTIFY_OBJ_TYPE_VFSMOUNT, + FSNOTIFY_OBJ_TYPE_SB, FSNOTIFY_OBJ_TYPE_COUNT, FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT }; #define FSNOTIFY_OBJ_TYPE_INODE_FL (1U << FSNOTIFY_OBJ_TYPE_INODE) #define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT) +#define FSNOTIFY_OBJ_TYPE_SB_FL (1U << FSNOTIFY_OBJ_TYPE_SB) #define FSNOTIFY_OBJ_ALL_TYPES_MASK ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1) static inline bool fsnotify_valid_obj_type(unsigned int type) @@ -255,6 +257,7 @@ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ FSNOTIFY_ITER_FUNCS(inode, INODE) FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT) +FSNOTIFY_ITER_FUNCS(sb, SB) #define fsnotify_foreach_obj_type(type) \ for (type = 0; type < FSNOTIFY_OBJ_TYPE_COUNT; type++) @@ -267,8 +270,8 @@ struct fsnotify_mark_connector; typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; /* - * Inode / vfsmount point to this structure which tracks all marks attached to - * the inode / vfsmount. The reference to inode / vfsmount is held by this + * Inode/vfsmount/sb point to this structure which tracks all marks attached to + * the inode/vfsmount/sb. The reference to inode/vfsmount/sb is held by this * structure. We destroy this structure when there are no more marks attached * to it. The structure is protected by fsnotify_mark_srcu. */ @@ -335,6 +338,7 @@ extern int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int dat extern int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask); extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); +extern void fsnotify_sb_delete(struct super_block *sb); extern u32 fsnotify_get_cookie(void); static inline int fsnotify_inode_watches_children(struct inode *inode) @@ -455,9 +459,13 @@ static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *gr { fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE_FL); } +/* run all the marks in a group, and clear all of the sn marks */ +static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group) +{ + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB_FL); +} extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); -extern void fsnotify_unmount_inodes(struct super_block *sb); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); @@ -484,6 +492,9 @@ static inline void __fsnotify_inode_delete(struct inode *inode) static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt) {} +static inline void fsnotify_sb_delete(struct super_block *sb) +{} + static inline void fsnotify_update_flags(struct dentry *dentry) {} From 60f7ed8c7c4d06aeda448c6da74621552ee739aa Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 1 Sep 2018 10:41:12 +0300 Subject: [PATCH 02/13] fsnotify: send path type events to group with super block marks Send events to group if super block mark mask matches the event and unless the same group has an ignore mask on the vfsmount or the inode on which the event occurred. Soon, fanotify backend is going to support super block marks and fanotify backend only supports path type events. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fsnotify.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 2971803d151c..422fbc6dffde 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -325,15 +325,18 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, const unsigned char *file_name, u32 cookie) { struct fsnotify_iter_info iter_info = {}; - struct mount *mnt; + struct super_block *sb = NULL; + struct mount *mnt = NULL; + __u32 mnt_or_sb_mask = 0; int ret = 0; /* global tests shouldn't care about events on child only the specific event */ __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); - if (data_is == FSNOTIFY_EVENT_PATH) + if (data_is == FSNOTIFY_EVENT_PATH) { mnt = real_mount(((const struct path *)data)->mnt); - else - mnt = NULL; + sb = mnt->mnt.mnt_sb; + mnt_or_sb_mask = mnt->mnt_fsnotify_mask | sb->s_fsnotify_mask; + } /* * Optimization: srcu_read_lock() has a memory barrier which can @@ -343,16 +346,15 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, * need SRCU to keep them "alive". */ if (!to_tell->i_fsnotify_marks && - (!mnt || !mnt->mnt_fsnotify_marks)) + (!mnt || (!mnt->mnt_fsnotify_marks && !sb->s_fsnotify_marks))) return 0; /* * if this is a modify event we may need to clear the ignored masks - * otherwise return if neither the inode nor the vfsmount care about + * otherwise return if neither the inode nor the vfsmount/sb care about * this type of event. */ if (!(mask & FS_MODIFY) && - !(test_mask & to_tell->i_fsnotify_mask) && - !(mnt && test_mask & mnt->mnt_fsnotify_mask)) + !(test_mask & (to_tell->i_fsnotify_mask | mnt_or_sb_mask))) return 0; iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); @@ -362,11 +364,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, if (mnt) { iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); + iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] = + fsnotify_first_mark(&sb->s_fsnotify_marks); } /* - * We need to merge inode & vfsmount mark lists so that inode mark - * ignore masks are properly reflected for mount mark notifications. + * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark + * ignore masks are properly reflected for mount/sb mark notifications. * That's why this traversal is so complicated... */ while (fsnotify_iter_select_report_types(&iter_info)) { From d54f4fba889b205e9cd8239182ca5d27d0ac3bc2 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 1 Sep 2018 10:41:13 +0300 Subject: [PATCH 03/13] fanotify: add API to attach/detach super block mark Add another mark type flag FAN_MARK_FILESYSTEM for add/remove/flush of super block mark type. A super block watch gets all events on the filesystem, regardless of the mount from which the mark was added, unless an ignore mask exists on either the inode or the mount where the event was generated. Only one of FAN_MARK_MOUNT and FAN_MARK_FILESYSTEM mark type flags may be provided to fanotify_mark() or no mark type flag for inode mark. Cc: Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify_user.c | 42 ++++++++++++++++++++++++++---- include/uapi/linux/fanotify.h | 16 +++++++++--- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 69054886915b..1347c588f778 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -563,6 +563,13 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, mask, flags); } +static int fanotify_remove_sb_mark(struct fsnotify_group *group, + struct super_block *sb, __u32 mask, + unsigned int flags) +{ + return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, flags); +} + static int fanotify_remove_inode_mark(struct fsnotify_group *group, struct inode *inode, __u32 mask, unsigned int flags) @@ -658,6 +665,14 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags); } +static int fanotify_add_sb_mark(struct fsnotify_group *group, + struct super_block *sb, __u32 mask, + unsigned int flags) +{ + return fanotify_add_mark(group, &sb->s_fsnotify_marks, + FSNOTIFY_OBJ_TYPE_SB, mask, flags); +} + static int fanotify_add_inode_mark(struct fsnotify_group *group, struct inode *inode, __u32 mask, unsigned int flags) @@ -806,6 +821,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, struct fd f; struct path path; u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD; + unsigned int mark_type = flags & FAN_MARK_TYPE_MASK; int ret; pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", @@ -817,6 +833,16 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (flags & ~FAN_ALL_MARK_FLAGS) return -EINVAL; + + switch (mark_type) { + case FAN_MARK_INODE: + case FAN_MARK_MOUNT: + case FAN_MARK_FILESYSTEM: + break; + default: + return -EINVAL; + } + switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { case FAN_MARK_ADD: /* fallthrough */ case FAN_MARK_REMOVE: @@ -824,7 +850,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; break; case FAN_MARK_FLUSH: - if (flags & ~(FAN_MARK_MOUNT | FAN_MARK_FLUSH)) + if (flags & ~(FAN_MARK_TYPE_MASK | FAN_MARK_FLUSH)) return -EINVAL; break; default: @@ -863,8 +889,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (flags & FAN_MARK_FLUSH) { ret = 0; - if (flags & FAN_MARK_MOUNT) + if (mark_type == FAN_MARK_MOUNT) fsnotify_clear_vfsmount_marks_by_group(group); + else if (mark_type == FAN_MARK_FILESYSTEM) + fsnotify_clear_sb_marks_by_group(group); else fsnotify_clear_inode_marks_by_group(group); goto fput_and_out; @@ -875,7 +903,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, goto fput_and_out; /* inode held in place by reference to path; group by fget on fd */ - if (!(flags & FAN_MARK_MOUNT)) + if (mark_type == FAN_MARK_INODE) inode = path.dentry->d_inode; else mnt = path.mnt; @@ -883,14 +911,18 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, /* create/update an inode mark */ switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { case FAN_MARK_ADD: - if (flags & FAN_MARK_MOUNT) + if (mark_type == FAN_MARK_MOUNT) ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags); + else if (mark_type == FAN_MARK_FILESYSTEM) + ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, flags); else ret = fanotify_add_inode_mark(group, inode, mask, flags); break; case FAN_MARK_REMOVE: - if (flags & FAN_MARK_MOUNT) + if (mark_type == FAN_MARK_MOUNT) ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags); + else if (mark_type == FAN_MARK_FILESYSTEM) + ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, flags); else ret = fanotify_remove_inode_mark(group, inode, mask, flags); break; diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 74247917de04..ad81234d1919 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -27,7 +27,7 @@ #define FAN_CLOEXEC 0x00000001 #define FAN_NONBLOCK 0x00000002 -/* These are NOT bitwise flags. Both bits are used togther. */ +/* These are NOT bitwise flags. Both bits are used together. */ #define FAN_CLASS_NOTIF 0x00000000 #define FAN_CLASS_CONTENT 0x00000004 #define FAN_CLASS_PRE_CONTENT 0x00000008 @@ -47,19 +47,27 @@ #define FAN_MARK_REMOVE 0x00000002 #define FAN_MARK_DONT_FOLLOW 0x00000004 #define FAN_MARK_ONLYDIR 0x00000008 -#define FAN_MARK_MOUNT 0x00000010 +/* FAN_MARK_MOUNT is 0x00000010 */ #define FAN_MARK_IGNORED_MASK 0x00000020 #define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 #define FAN_MARK_FLUSH 0x00000080 +/* FAN_MARK_FILESYSTEM is 0x00000100 */ + +/* These are NOT bitwise flags. Both bits can be used togther. */ +#define FAN_MARK_INODE 0x00000000 +#define FAN_MARK_MOUNT 0x00000010 +#define FAN_MARK_FILESYSTEM 0x00000100 +#define FAN_MARK_TYPE_MASK (FAN_MARK_INODE | FAN_MARK_MOUNT | \ + FAN_MARK_FILESYSTEM) #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ FAN_MARK_REMOVE |\ FAN_MARK_DONT_FOLLOW |\ FAN_MARK_ONLYDIR |\ - FAN_MARK_MOUNT |\ FAN_MARK_IGNORED_MASK |\ FAN_MARK_IGNORED_SURV_MODIFY |\ - FAN_MARK_FLUSH) + FAN_MARK_FLUSH|\ + FAN_MARK_TYPE_MASK) /* * All of the events - we build the list by hand so that we can add flags in From 96a71f21ef1fcc32bea07c612a332a89a213f054 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 21 Sep 2018 21:20:30 +0300 Subject: [PATCH 04/13] fanotify: store fanotify_init() flags in group's fanotify_data This averts the need to re-generate flags in fanotify_show_fdinfo() and sets the scene for addition of more upcoming flags without growing new members to the fanotify_data struct. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify_user.c | 8 ++++---- fs/notify/fdinfo.c | 24 +----------------------- include/linux/fanotify.h | 4 ++++ include/linux/fsnotify_backend.h | 4 ++-- 4 files changed, 11 insertions(+), 29 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 1347c588f778..15719d4aa4b5 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -191,7 +191,7 @@ static int process_access_response(struct fsnotify_group *group, if (fd < 0) return -EINVAL; - if ((response & FAN_AUDIT) && !group->fanotify_data.audit) + if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT)) return -EINVAL; event = dequeue_event(group, fd); @@ -701,8 +701,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) struct user_struct *user; struct fanotify_event_info *oevent; - pr_debug("%s: flags=%d event_f_flags=%d\n", - __func__, flags, event_f_flags); + pr_debug("%s: flags=%x event_f_flags=%x\n", + __func__, flags, event_f_flags); if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -746,6 +746,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) } group->fanotify_data.user = user; + group->fanotify_data.flags = flags; atomic_inc(&user->fanotify_listeners); group->memcg = get_mem_cgroup_from_mm(current->mm); @@ -798,7 +799,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) fd = -EPERM; if (!capable(CAP_AUDIT_WRITE)) goto out_destroy_group; - group->fanotify_data.audit = true; } fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 25385e336ac7..348a184bcdda 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -142,31 +142,9 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) void fanotify_show_fdinfo(struct seq_file *m, struct file *f) { struct fsnotify_group *group = f->private_data; - unsigned int flags = 0; - - switch (group->priority) { - case FS_PRIO_0: - flags |= FAN_CLASS_NOTIF; - break; - case FS_PRIO_1: - flags |= FAN_CLASS_CONTENT; - break; - case FS_PRIO_2: - flags |= FAN_CLASS_PRE_CONTENT; - break; - } - - if (group->max_events == UINT_MAX) - flags |= FAN_UNLIMITED_QUEUE; - - if (group->fanotify_data.max_marks == UINT_MAX) - flags |= FAN_UNLIMITED_MARKS; - - if (group->fanotify_data.audit) - flags |= FAN_ENABLE_AUDIT; seq_printf(m, "fanotify flags:%x event-flags:%x\n", - flags, group->fanotify_data.f_flags); + group->fanotify_data.flags, group->fanotify_data.f_flags); show_fdinfo(m, f, fanotify_fdinfo); } diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 096c96f4f16a..9c5ea3bdfaa0 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -6,4 +6,8 @@ /* not valid from userspace, only kernel internal */ #define FAN_MARK_ONDIR 0x00000100 + +#define FAN_GROUP_FLAG(group, flag) \ + ((group)->fanotify_data.flags & (flag)) + #endif /* _LINUX_FANOTIFY_H */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 81b88fc9df31..8e91341cbd8a 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -189,10 +189,10 @@ struct fsnotify_group { /* allows a group to block waiting for a userspace response */ struct list_head access_list; wait_queue_head_t access_waitq; - int f_flags; + int flags; /* flags from fanotify_init() */ + int f_flags; /* event_f_flags from fanotify_init() */ unsigned int max_marks; struct user_struct *user; - bool audit; } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; From b723a7911d028fb55f67a63c4c4d895f72e059d4 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 Oct 2018 00:25:32 +0300 Subject: [PATCH 05/13] fanotify: fix collision of internal and uapi mark flags The new mark flag FAN_MARK_FILESYSTEMS collides with existing internal flag FAN_MARK_ONDIR. Change internal flag value to avoid the collision. Fixes: d54f4fba889b ("fanotify: add API to attach/detach super block mark") Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fanotify.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 9c5ea3bdfaa0..e70fccc3757e 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -5,7 +5,7 @@ #include /* not valid from userspace, only kernel internal */ -#define FAN_MARK_ONDIR 0x00000100 +#define FAN_MARK_ONDIR 0x80000000 #define FAN_GROUP_FLAG(group, flag) \ ((group)->fanotify_data.flags & (flag)) From 007d1e8395eaa59b0e7ad9eb2b53a40859446a88 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 Oct 2018 00:25:33 +0300 Subject: [PATCH 06/13] fsnotify: generalize handling of extra event flags FS_EVENT_ON_CHILD gets a special treatment in fsnotify() because it is not a flag specifying an event type, but rather an extra flags that may be reported along with another event and control the handling of the event by the backend. FS_ISDIR is also an "extra flag" and not an "event type" and therefore desrves the same treatment. With inotify/dnotify backends it was never possible to set FS_ISDIR in mark masks, so it did not matter. With fanotify backend, mark adding code jumps through hoops to avoid setting the FS_ISDIR in the commulative object mask. Separate the constant ALL_FSNOTIFY_EVENTS to ALL_FSNOTIFY_FLAGS and ALL_FSNOTIFY_EVENTS, so the latter can be used to test for specific event types. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fsnotify.c | 7 +++---- include/linux/fsnotify_backend.h | 9 +++++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 422fbc6dffde..7391a02bf723 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -196,7 +196,7 @@ static int send_to_group(struct inode *to_tell, struct fsnotify_iter_info *iter_info) { struct fsnotify_group *group = NULL; - __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); + __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); __u32 marks_mask = 0; __u32 marks_ignored_mask = 0; struct fsnotify_mark *mark; @@ -329,8 +329,7 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, struct mount *mnt = NULL; __u32 mnt_or_sb_mask = 0; int ret = 0; - /* global tests shouldn't care about events on child only the specific event */ - __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); + __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); if (data_is == FSNOTIFY_EVENT_PATH) { mnt = real_mount(((const struct path *)data)->mnt); @@ -396,7 +395,7 @@ static __init int fsnotify_init(void) { int ret; - BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23); + BUG_ON(hweight32(ALL_FSNOTIFY_BITS) != 23); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 8e91341cbd8a..135b973e44d1 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -68,15 +68,20 @@ #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM) +/* Events that can be reported to backends */ #define ALL_FSNOTIFY_EVENTS (FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN | \ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE | \ FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ - FS_OPEN_PERM | FS_ACCESS_PERM | FS_EXCL_UNLINK | \ - FS_ISDIR | FS_IN_ONESHOT | FS_DN_RENAME | \ + FS_OPEN_PERM | FS_ACCESS_PERM | FS_DN_RENAME) + +/* Extra flags that may be reported with event or control handling of events */ +#define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ FS_DN_MULTISHOT | FS_EVENT_ON_CHILD) +#define ALL_FSNOTIFY_BITS (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS) + struct fsnotify_group; struct fsnotify_event; struct fsnotify_mark; From a72fd224e37bf6a0630bce302deebbccbc236ba2 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 Oct 2018 00:25:34 +0300 Subject: [PATCH 07/13] fanotify: simplify handling of FAN_ONDIR fanotify mark add/remove code jumps through hoops to avoid setting the FS_ISDIR in the commulative object mask. That was just papering over a bug in fsnotify() handling of the FS_ISDIR extra flag. This bug is now fixed, so all the hoops can be removed along with the unneeded internal flag FAN_MARK_ONDIR. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify_user.c | 32 +++++------------------------- include/linux/fanotify.h | 3 --- 2 files changed, 5 insertions(+), 30 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 15719d4aa4b5..34b511407035 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -506,18 +506,10 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, spin_lock(&fsn_mark->lock); if (!(flags & FAN_MARK_IGNORED_MASK)) { - __u32 tmask = fsn_mark->mask & ~mask; - - if (flags & FAN_MARK_ONDIR) - tmask &= ~FAN_ONDIR; - oldmask = fsn_mark->mask; - fsn_mark->mask = tmask; + fsn_mark->mask &= ~mask; } else { - __u32 tmask = fsn_mark->ignored_mask & ~mask; - if (flags & FAN_MARK_ONDIR) - tmask &= ~FAN_ONDIR; - fsn_mark->ignored_mask = tmask; + fsn_mark->ignored_mask &= ~mask; } *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask); spin_unlock(&fsn_mark->lock); @@ -586,19 +578,10 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, spin_lock(&fsn_mark->lock); if (!(flags & FAN_MARK_IGNORED_MASK)) { - __u32 tmask = fsn_mark->mask | mask; - - if (flags & FAN_MARK_ONDIR) - tmask |= FAN_ONDIR; - oldmask = fsn_mark->mask; - fsn_mark->mask = tmask; + fsn_mark->mask |= mask; } else { - __u32 tmask = fsn_mark->ignored_mask | mask; - if (flags & FAN_MARK_ONDIR) - tmask |= FAN_ONDIR; - - fsn_mark->ignored_mask = tmask; + fsn_mark->ignored_mask |= mask; if (flags & FAN_MARK_IGNORED_SURV_MODIFY) fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; } @@ -820,7 +803,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, struct fsnotify_group *group; struct fd f; struct path path; - u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD; + u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD | FAN_ONDIR; unsigned int mark_type = flags & FAN_MARK_TYPE_MASK; int ret; @@ -857,11 +840,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; } - if (mask & FAN_ONDIR) { - flags |= FAN_MARK_ONDIR; - mask &= ~FAN_ONDIR; - } - if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) valid_mask |= FAN_ALL_PERM_EVENTS; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index e70fccc3757e..a8c3fc54276d 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -4,9 +4,6 @@ #include -/* not valid from userspace, only kernel internal */ -#define FAN_MARK_ONDIR 0x80000000 - #define FAN_GROUP_FLAG(group, flag) \ ((group)->fanotify_data.flags & (flag)) From 23c9deeb3285d34fd243abb3d6b9f07db60c3cf4 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 Oct 2018 00:25:35 +0300 Subject: [PATCH 08/13] fanotify: deprecate uapi FAN_ALL_* constants We do not want to add new bits to the FAN_ALL_* uapi constants because they have been exposed to userspace. If there are programs out there using these constants, those programs could break if re-compiled with modified FAN_ALL_* constants and run on an old kernel. We deprecate the uapi constants FAN_ALL_* and define new FANOTIFY_* constants for internal use to replace them. New feature bits will be added only to the new constants. Cc: Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 6 ++-- fs/notify/fanotify/fanotify.h | 2 +- fs/notify/fanotify/fanotify_user.c | 22 +++++++------- include/linux/fanotify.h | 47 ++++++++++++++++++++++++++++++ include/uapi/linux/fanotify.h | 18 ++++++------ 5 files changed, 71 insertions(+), 24 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 94b52157bf8d..03498eb995be 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -131,8 +131,8 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info, !(marks_mask & FS_ISDIR & ~marks_ignored_mask)) return false; - if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask & - ~marks_ignored_mask) + if (event_mask & FANOTIFY_OUTGOING_EVENTS & + marks_mask & ~marks_ignored_mask) return true; return false; @@ -236,7 +236,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, ret = fsnotify_add_event(group, fsn_event, fanotify_merge); if (ret) { /* Permission events shouldn't be merged */ - BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS); + BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS); /* Our event wasn't used in the end. Free it. */ fsnotify_destroy_event(group, fsn_event); diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 8609ba06f474..88a8290a61cb 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -44,7 +44,7 @@ FANOTIFY_PE(struct fsnotify_event *fse) static inline bool fanotify_is_perm_event(u32 mask) { return IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS) && - mask & FAN_ALL_PERM_EVENTS; + mask & FANOTIFY_PERM_EVENTS; } static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 34b511407035..530e5e486105 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -131,7 +131,7 @@ static int fill_event_metadata(struct fsnotify_group *group, metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; metadata->reserved = 0; - metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS; + metadata->mask = fsn_event->mask & FANOTIFY_OUTGOING_EVENTS; metadata->pid = pid_vnr(event->tgid); if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW)) metadata->fd = FAN_NOFD; @@ -395,7 +395,7 @@ static int fanotify_release(struct inode *ignored, struct file *file) */ while (!fsnotify_notify_queue_is_empty(group)) { fsn_event = fsnotify_remove_first_event(group); - if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS)) { + if (!(fsn_event->mask & FANOTIFY_PERM_EVENTS)) { spin_unlock(&group->notification_lock); fsnotify_destroy_event(group, fsn_event); spin_lock(&group->notification_lock); @@ -691,9 +691,9 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) return -EPERM; #ifdef CONFIG_AUDITSYSCALL - if (flags & ~(FAN_ALL_INIT_FLAGS | FAN_ENABLE_AUDIT)) + if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT)) #else - if (flags & ~FAN_ALL_INIT_FLAGS) + if (flags & ~FANOTIFY_INIT_FLAGS) #endif return -EINVAL; @@ -745,7 +745,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->fanotify_data.f_flags = event_f_flags; init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); - switch (flags & FAN_ALL_CLASS_BITS) { + switch (flags & FANOTIFY_CLASS_BITS) { case FAN_CLASS_NOTIF: group->priority = FS_PRIO_0; break; @@ -803,8 +803,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, struct fsnotify_group *group; struct fd f; struct path path; - u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD | FAN_ONDIR; - unsigned int mark_type = flags & FAN_MARK_TYPE_MASK; + u32 valid_mask = FANOTIFY_EVENTS | FAN_EVENT_ON_CHILD | FAN_ONDIR; + unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; int ret; pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", @@ -814,7 +814,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (mask & ((__u64)0xffffffff << 32)) return -EINVAL; - if (flags & ~FAN_ALL_MARK_FLAGS) + if (flags & ~FANOTIFY_MARK_FLAGS) return -EINVAL; switch (mark_type) { @@ -833,7 +833,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; break; case FAN_MARK_FLUSH: - if (flags & ~(FAN_MARK_TYPE_MASK | FAN_MARK_FLUSH)) + if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH)) return -EINVAL; break; default: @@ -841,7 +841,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, } if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) - valid_mask |= FAN_ALL_PERM_EVENTS; + valid_mask |= FANOTIFY_PERM_EVENTS; if (mask & ~valid_mask) return -EINVAL; @@ -861,7 +861,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, * allowed to set permissions events. */ ret = -EINVAL; - if (mask & FAN_ALL_PERM_EVENTS && + if (mask & FANOTIFY_PERM_EVENTS && group->priority == FS_PRIO_0) goto fput_and_out; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index a8c3fc54276d..4519b0988afe 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -7,4 +7,51 @@ #define FAN_GROUP_FLAG(group, flag) \ ((group)->fanotify_data.flags & (flag)) +/* + * Flags allowed to be passed from/to userspace. + * + * We intentionally do not add new bits to the old FAN_ALL_* constants, because + * they are uapi exposed constants. If there are programs out there using + * these constant, the programs may break if re-compiled with new uapi headers + * and then run on an old kernel. + */ +#define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ + FAN_CLASS_PRE_CONTENT) + +#define FANOTIFY_INIT_FLAGS (FANOTIFY_CLASS_BITS | \ + FAN_CLOEXEC | FAN_NONBLOCK | \ + FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS) + +#define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \ + FAN_MARK_FILESYSTEM) + +#define FANOTIFY_MARK_FLAGS (FANOTIFY_MARK_TYPE_BITS | \ + FAN_MARK_ADD | \ + FAN_MARK_REMOVE | \ + FAN_MARK_DONT_FOLLOW | \ + FAN_MARK_ONLYDIR | \ + FAN_MARK_IGNORED_MASK | \ + FAN_MARK_IGNORED_SURV_MODIFY | \ + FAN_MARK_FLUSH) + +/* Events that user can request to be notified on */ +#define FANOTIFY_EVENTS (FAN_ACCESS | FAN_MODIFY | \ + FAN_CLOSE | FAN_OPEN) + +/* Events that require a permission response from user */ +#define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM) + +/* Events that may be reported to user */ +#define FANOTIFY_OUTGOING_EVENTS (FANOTIFY_EVENTS | \ + FANOTIFY_PERM_EVENTS | \ + FAN_Q_OVERFLOW) + +/* Do not use these old uapi constants internally */ +#undef FAN_ALL_CLASS_BITS +#undef FAN_ALL_INIT_FLAGS +#undef FAN_ALL_MARK_FLAGS +#undef FAN_ALL_EVENTS +#undef FAN_ALL_PERM_EVENTS +#undef FAN_ALL_OUTGOING_EVENTS + #endif /* _LINUX_FANOTIFY_H */ diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index ad81234d1919..d0c05de670ef 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -31,6 +31,8 @@ #define FAN_CLASS_NOTIF 0x00000000 #define FAN_CLASS_CONTENT 0x00000004 #define FAN_CLASS_PRE_CONTENT 0x00000008 + +/* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ FAN_CLASS_PRE_CONTENT) @@ -38,6 +40,7 @@ #define FAN_UNLIMITED_MARKS 0x00000020 #define FAN_ENABLE_AUDIT 0x00000040 +/* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\ FAN_UNLIMITED_MARKS) @@ -57,23 +60,18 @@ #define FAN_MARK_INODE 0x00000000 #define FAN_MARK_MOUNT 0x00000010 #define FAN_MARK_FILESYSTEM 0x00000100 -#define FAN_MARK_TYPE_MASK (FAN_MARK_INODE | FAN_MARK_MOUNT | \ - FAN_MARK_FILESYSTEM) +/* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ FAN_MARK_REMOVE |\ FAN_MARK_DONT_FOLLOW |\ FAN_MARK_ONLYDIR |\ + FAN_MARK_MOUNT |\ FAN_MARK_IGNORED_MASK |\ FAN_MARK_IGNORED_SURV_MODIFY |\ - FAN_MARK_FLUSH|\ - FAN_MARK_TYPE_MASK) + FAN_MARK_FLUSH) -/* - * All of the events - we build the list by hand so that we can add flags in - * the future and not break backward compatibility. Apps will get only the - * events that they originally wanted. Be sure to add new events here! - */ +/* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_EVENTS (FAN_ACCESS |\ FAN_MODIFY |\ FAN_CLOSE |\ @@ -82,9 +80,11 @@ /* * All events which require a permission response from userspace */ +/* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\ FAN_ACCESS_PERM) +/* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\ FAN_ALL_PERM_EVENTS |\ FAN_Q_OVERFLOW) From a39f7ec41766118bf836d266224505741570ada6 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 Oct 2018 00:25:36 +0300 Subject: [PATCH 09/13] fsnotify: convert runtime BUG_ON() to BUILD_BUG_ON() The BUG_ON() statements to verify number of bits in ALL_FSNOTIFY_BITS and ALL_INOTIFY_BITS are converted to build time check of the constant. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fsnotify.c | 2 +- fs/notify/inotify/inotify_user.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 7391a02bf723..875975504409 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -395,7 +395,7 @@ static __init int fsnotify_init(void) { int ret; - BUG_ON(hweight32(ALL_FSNOTIFY_BITS) != 23); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index ac6978d3208c..105576daca4a 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -815,7 +815,7 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); - BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22); + BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22); inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC|SLAB_ACCOUNT); From bdd5a46fe30653cb4d26c7c787a22159bf79eed9 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 Oct 2018 00:25:37 +0300 Subject: [PATCH 10/13] fanotify: add BUILD_BUG_ON() to count the bits of fanotify constants Also define the FANOTIFY_EVENT_FLAGS consisting of the extra flags FAN_ONDIR and FAN_ON_CHILD. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 2 ++ fs/notify/fanotify/fanotify_user.c | 5 ++++- include/linux/fanotify.h | 6 ++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 03498eb995be..361e3a0a445c 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -205,6 +205,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 10); + if (!fanotify_should_send_event(iter_info, mask, data, data_type)) return 0; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 530e5e486105..14594e491d2b 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -803,7 +803,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, struct fsnotify_group *group; struct fd f; struct path path; - u32 valid_mask = FANOTIFY_EVENTS | FAN_EVENT_ON_CHILD | FAN_ONDIR; + u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; int ret; @@ -944,6 +944,9 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark, */ static int __init fanotify_user_setup(void) { + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 6); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); + fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 4519b0988afe..caf55c67fc6c 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -41,11 +41,17 @@ /* Events that require a permission response from user */ #define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM) +/* Extra flags that may be reported with event or control handling of events */ +#define FANOTIFY_EVENT_FLAGS (FAN_EVENT_ON_CHILD | FAN_ONDIR) + /* Events that may be reported to user */ #define FANOTIFY_OUTGOING_EVENTS (FANOTIFY_EVENTS | \ FANOTIFY_PERM_EVENTS | \ FAN_Q_OVERFLOW) +#define ALL_FANOTIFY_EVENT_BITS (FANOTIFY_OUTGOING_EVENTS | \ + FANOTIFY_EVENT_FLAGS) + /* Do not use these old uapi constants internally */ #undef FAN_ALL_CLASS_BITS #undef FAN_ALL_INIT_FLAGS From d0a6a87e40da49cfc7954c491d3065a25a641b29 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 Oct 2018 00:25:38 +0300 Subject: [PATCH 11/13] fanotify: support reporting thread id instead of process id In order to identify which thread triggered the event in a multi-threaded program, add the FAN_REPORT_TID flag in fanotify_init to opt-in for reporting the event creator's thread id information. Signed-off-by: nixiaoming Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 9 ++++++--- fs/notify/fanotify/fanotify.h | 2 +- fs/notify/fanotify/fanotify_user.c | 4 ++-- include/linux/fanotify.h | 1 + include/uapi/linux/fanotify.h | 3 +++ 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 361e3a0a445c..5769cf3ff035 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -25,7 +25,7 @@ static bool should_merge(struct fsnotify_event *old_fsn, old = FANOTIFY_E(old_fsn); new = FANOTIFY_E(new_fsn); - if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid && + if (old_fsn->inode == new_fsn->inode && old->pid == new->pid && old->path.mnt == new->path.mnt && old->path.dentry == new->path.dentry) return true; @@ -171,7 +171,10 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, goto out; init: __maybe_unused fsnotify_init_event(&event->fse, inode, mask); - event->tgid = get_pid(task_tgid(current)); + if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) + event->pid = get_pid(task_pid(current)); + else + event->pid = get_pid(task_tgid(current)); if (path) { event->path = *path; path_get(&event->path); @@ -270,7 +273,7 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) event = FANOTIFY_E(fsn_event); path_put(&event->path); - put_pid(event->tgid); + put_pid(event->pid); if (fanotify_is_perm_event(fsn_event->mask)) { kmem_cache_free(fanotify_perm_event_cachep, FANOTIFY_PE(fsn_event)); diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 88a8290a61cb..ea05b8a401e7 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -19,7 +19,7 @@ struct fanotify_event_info { * during this object's lifetime */ struct path path; - struct pid *tgid; + struct pid *pid; }; /* diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 14594e491d2b..e03be5071362 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -132,7 +132,7 @@ static int fill_event_metadata(struct fsnotify_group *group, metadata->vers = FANOTIFY_METADATA_VERSION; metadata->reserved = 0; metadata->mask = fsn_event->mask & FANOTIFY_OUTGOING_EVENTS; - metadata->pid = pid_vnr(event->tgid); + metadata->pid = pid_vnr(event->pid); if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW)) metadata->fd = FAN_NOFD; else { @@ -944,7 +944,7 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark, */ static int __init fanotify_user_setup(void) { - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 6); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 7); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index caf55c67fc6c..a5a60691e48b 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -19,6 +19,7 @@ FAN_CLASS_PRE_CONTENT) #define FANOTIFY_INIT_FLAGS (FANOTIFY_CLASS_BITS | \ + FAN_REPORT_TID | \ FAN_CLOEXEC | FAN_NONBLOCK | \ FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS) diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index d0c05de670ef..b86740d1c50a 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -40,6 +40,9 @@ #define FAN_UNLIMITED_MARKS 0x00000020 #define FAN_ENABLE_AUDIT 0x00000040 +/* Flags to determine fanotify event format */ +#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ + /* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\ From 99c228a994ec8b1580c43631866fd2c5440f5bfd Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 18 Oct 2018 14:22:55 +0300 Subject: [PATCH 12/13] fs: group frequently accessed fields of struct super_block together Kernel test robot reported [1] a 6% performance regression in a concurrent unlink(2) workload on commit 60f7ed8c7c4d ("fsnotify: send path type events to group with super block marks"). The performance test was run with no fsnotify marks at all on the data set, so the only extra instructions added by the offending commit are tests of the super_block fields s_fsnotify_{marks,mask} and these tests happen on almost every single inode access. When adding those fields to the super_block struct, we did not give much thought of placing them on a hot cache lines (we just placed them at the end of the struct). Re-organize struct super_block to try and keep some frequently accessed fields on the same cache line. Move the frequently accessed fields s_fsnotify_{marks,mask} near the frequently accessed fields s_fs_info,s_time_gran, while filling a 64bit alignment hole after s_time_gran. Move the seldom accessed fields s_id,s_uuid,s_max_links,s_mode near the seldom accessed fields s_vfs_rename_mutex,s_subtype. Rong Chen confirmed that this patch solved the reported problem. [1] https://lkml.org/lkml/2018/9/30/206 Reported-by: kernel test robot Tested-by: kernel test robot Fixes: 1e6cb72399 ("fsnotify: add super block object type") Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fs.h | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 2c14801d0aa3..6da94deb957f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1393,17 +1393,26 @@ struct super_block { struct sb_writers s_writers; + /* + * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and + * s_fsnotify_marks together for cache efficiency. They are frequently + * accessed and rarely modified. + */ + void *s_fs_info; /* Filesystem private info */ + + /* Granularity of c/m/atime in ns (cannot be worse than a second) */ + u32 s_time_gran; +#ifdef CONFIG_FSNOTIFY + __u32 s_fsnotify_mask; + struct fsnotify_mark_connector __rcu *s_fsnotify_marks; +#endif + char s_id[32]; /* Informational name */ uuid_t s_uuid; /* UUID */ - void *s_fs_info; /* Filesystem private info */ unsigned int s_max_links; fmode_t s_mode; - /* Granularity of c/m/atime in ns. - Cannot be worse than a second */ - u32 s_time_gran; - /* * The next field is for VFS *only*. No filesystems have any business * even looking at it. You had been warned. @@ -1464,11 +1473,6 @@ struct super_block { spinlock_t s_inode_wblist_lock; struct list_head s_inodes_wb; /* writeback inodes */ - -#ifdef CONFIG_FSNOTIFY - __u32 s_fsnotify_mask; - struct fsnotify_mark_connector __rcu *s_fsnotify_marks; -#endif } __randomize_layout; /* Helper functions so that in most cases filesystems will From 721fb6fbfd2132164c2e8777cc837f9b2c1794dc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 17 Oct 2018 13:07:05 +0200 Subject: [PATCH 13/13] fsnotify: Fix busy inodes during unmount Detaching of mark connector from fsnotify_put_mark() can race with unmounting of the filesystem like: CPU1 CPU2 fsnotify_put_mark() spin_lock(&conn->lock); ... inode = fsnotify_detach_connector_from_object(conn) spin_unlock(&conn->lock); generic_shutdown_super() fsnotify_unmount_inodes() sees connector detached for inode -> nothing to do evict_inode() barfs on pending inode reference iput(inode); Resulting in "Busy inodes after unmount" message and possible kernel oops. Make fsnotify_unmount_inodes() properly wait for outstanding inode references from detached connectors. Note that the accounting of outstanding inode references in the superblock can cause some cacheline contention on the counter. OTOH it happens only during deletion of the last notification mark from an inode (or during unlinking of watched inode) and that is not too bad. I have measured time to create & delete inotify watch 100000 times from 64 processes in parallel (each process having its own inotify group and its own file on a shared superblock) on a 64 CPU machine. Average and standard deviation of 15 runs look like: Avg Stddev Vanilla 9.817400 0.276165 Fixed 9.710467 0.228294 So there's no statistically significant difference. Fixes: 6b3f05d24d35 ("fsnotify: Detach mark from object list when last reference is dropped") CC: stable@vger.kernel.org Signed-off-by: Jan Kara --- fs/notify/fsnotify.c | 3 +++ fs/notify/mark.c | 39 +++++++++++++++++++++++++++++++-------- include/linux/fs.h | 3 +++ 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 875975504409..2172ba516c61 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -96,6 +96,9 @@ static void fsnotify_unmount_inodes(struct super_block *sb) if (iput_inode) iput(iput_inode); + /* Wait for outstanding inode references from connectors */ + wait_var_event(&sb->s_fsnotify_inode_refs, + !atomic_long_read(&sb->s_fsnotify_inode_refs)); } void fsnotify_sb_delete(struct super_block *sb) diff --git a/fs/notify/mark.c b/fs/notify/mark.c index b5172ccb2e60..d2dd16cb5989 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -181,17 +181,20 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) } } -static struct inode *fsnotify_detach_connector_from_object( - struct fsnotify_mark_connector *conn) +static void *fsnotify_detach_connector_from_object( + struct fsnotify_mark_connector *conn, + unsigned int *type) { struct inode *inode = NULL; + *type = conn->type; if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) return NULL; if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { inode = fsnotify_conn_inode(conn); inode->i_fsnotify_mask = 0; + atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs); } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { @@ -215,10 +218,29 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) fsnotify_put_group(group); } +/* Drop object reference originally held by a connector */ +static void fsnotify_drop_object(unsigned int type, void *objp) +{ + struct inode *inode; + struct super_block *sb; + + if (!objp) + return; + /* Currently only inode references are passed to be dropped */ + if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE)) + return; + inode = objp; + sb = inode->i_sb; + iput(inode); + if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs)) + wake_up_var(&sb->s_fsnotify_inode_refs); +} + void fsnotify_put_mark(struct fsnotify_mark *mark) { struct fsnotify_mark_connector *conn; - struct inode *inode = NULL; + void *objp = NULL; + unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED; bool free_conn = false; /* Catch marks that were actually never attached to object */ @@ -238,7 +260,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) conn = mark->connector; hlist_del_init_rcu(&mark->obj_list); if (hlist_empty(&conn->list)) { - inode = fsnotify_detach_connector_from_object(conn); + objp = fsnotify_detach_connector_from_object(conn, &type); free_conn = true; } else { __fsnotify_recalc_mask(conn); @@ -246,7 +268,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) mark->connector = NULL; spin_unlock(&conn->lock); - iput(inode); + fsnotify_drop_object(type, objp); if (free_conn) { spin_lock(&destroy_lock); @@ -713,7 +735,8 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp) { struct fsnotify_mark_connector *conn; struct fsnotify_mark *mark, *old_mark = NULL; - struct inode *inode; + void *objp; + unsigned int type; conn = fsnotify_grab_connector(connp); if (!conn) @@ -739,11 +762,11 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp) * mark references get dropped. It would lead to strange results such * as delaying inode deletion or blocking unmount. */ - inode = fsnotify_detach_connector_from_object(conn); + objp = fsnotify_detach_connector_from_object(conn, &type); spin_unlock(&conn->lock); if (old_mark) fsnotify_put_mark(old_mark); - iput(inode); + fsnotify_drop_object(type, objp); } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 6da94deb957f..00b23b21e78a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1437,6 +1437,9 @@ struct super_block { /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; + /* Pending fsnotify inode refs */ + atomic_long_t s_fsnotify_inode_refs; + /* Being remounted read-only */ int s_readonly_remount;