From 6473ea760ca1707ade74de5b57e74189d14f8e10 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:09 +0200 Subject: [PATCH 01/17] fsnotify: tidy up FS_ and FAN_ constants Order by value, so the free value ranges are easier to find. Link: https://lore.kernel.org/r/20200319151022.31456-2-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 11 +++++------ include/uapi/linux/fanotify.h | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 1915bdba2fad..db3cabb4600e 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -49,16 +49,15 @@ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ #define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */ -#define FS_ISDIR 0x40000000 /* event occurred against dir */ -#define FS_IN_ONESHOT 0x80000000 /* only send event once */ - -#define FS_DN_RENAME 0x10000000 /* file renamed */ -#define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ - /* This inode cares about things that happen to its children. Always set for * dnotify and inotify. */ #define FS_EVENT_ON_CHILD 0x08000000 +#define FS_DN_RENAME 0x10000000 /* file renamed */ +#define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ +#define FS_ISDIR 0x40000000 /* event occurred against dir */ +#define FS_IN_ONESHOT 0x80000000 /* only send event once */ + #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) /* diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index b9effa6f8503..2a1844edda47 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -25,9 +25,9 @@ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ #define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ -#define FAN_ONDIR 0x40000000 /* event occurred against dir */ +#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ -#define FAN_EVENT_ON_CHILD 0x08000000 /* interested in child events */ +#define FAN_ONDIR 0x40000000 /* Event occurred against dir */ /* helper events */ #define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ From eae36a2b8324c9dd0a66b2ae32abd4a456e49c39 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:10 +0200 Subject: [PATCH 02/17] fsnotify: factor helpers fsnotify_dentry() and fsnotify_file() Most of the code in fsnotify hooks is boiler plate of one or the other. Link: https://lore.kernel.org/r/20200319151022.31456-3-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 99 +++++++++++++++------------------------- 1 file changed, 37 insertions(+), 62 deletions(-) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index a2d5d175d3c1..f54936aa0365 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -41,16 +41,36 @@ static inline int fsnotify_parent(const struct path *path, } /* - * Simple wrapper to consolidate calls fsnotify_parent()/fsnotify() when - * an event is on a path. + * Simple wrappers to consolidate calls fsnotify_parent()/fsnotify() when + * an event is on a file/dentry. */ -static inline int fsnotify_path(struct inode *inode, const struct path *path, - __u32 mask) +static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) { - int ret = fsnotify_parent(path, NULL, mask); + struct inode *inode = d_inode(dentry); + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + + fsnotify_parent(NULL, dentry, mask); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); +} + +static inline int fsnotify_file(struct file *file, __u32 mask) +{ + const struct path *path = &file->f_path; + struct inode *inode = file_inode(file); + int ret; + + if (file->f_mode & FMODE_NONOTIFY) + return 0; + + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + + ret = fsnotify_parent(path, NULL, mask); if (ret) return ret; + return fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); } @@ -58,19 +78,16 @@ static inline int fsnotify_path(struct inode *inode, const struct path *path, static inline int fsnotify_perm(struct file *file, int mask) { int ret; - const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); __u32 fsnotify_mask = 0; - if (file->f_mode & FMODE_NONOTIFY) - return 0; if (!(mask & (MAY_READ | MAY_OPEN))) return 0; + if (mask & MAY_OPEN) { fsnotify_mask = FS_OPEN_PERM; if (file->f_flags & __FMODE_EXEC) { - ret = fsnotify_path(inode, path, FS_OPEN_EXEC_PERM); + ret = fsnotify_file(file, FS_OPEN_EXEC_PERM); if (ret) return ret; @@ -79,10 +96,7 @@ static inline int fsnotify_perm(struct file *file, int mask) fsnotify_mask = FS_ACCESS_PERM; } - if (S_ISDIR(inode->i_mode)) - fsnotify_mask |= FS_ISDIR; - - return fsnotify_path(inode, path, fsnotify_mask); + return fsnotify_file(file, fsnotify_mask); } /* @@ -229,15 +243,7 @@ static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) */ static inline void fsnotify_access(struct file *file) { - const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); - __u32 mask = FS_ACCESS; - - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - if (!(file->f_mode & FMODE_NONOTIFY)) - fsnotify_path(inode, path, mask); + fsnotify_file(file, FS_ACCESS); } /* @@ -245,15 +251,7 @@ static inline void fsnotify_access(struct file *file) */ static inline void fsnotify_modify(struct file *file) { - const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); - __u32 mask = FS_MODIFY; - - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - if (!(file->f_mode & FMODE_NONOTIFY)) - fsnotify_path(inode, path, mask); + fsnotify_file(file, FS_MODIFY); } /* @@ -261,16 +259,12 @@ static inline void fsnotify_modify(struct file *file) */ static inline void fsnotify_open(struct file *file) { - const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); __u32 mask = FS_OPEN; - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; if (file->f_flags & __FMODE_EXEC) mask |= FS_OPEN_EXEC; - fsnotify_path(inode, path, mask); + fsnotify_file(file, mask); } /* @@ -278,16 +272,10 @@ static inline void fsnotify_open(struct file *file) */ static inline void fsnotify_close(struct file *file) { - const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); - fmode_t mode = file->f_mode; - __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; + __u32 mask = (file->f_mode & FMODE_WRITE) ? FS_CLOSE_WRITE : + FS_CLOSE_NOWRITE; - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - if (!(file->f_mode & FMODE_NONOTIFY)) - fsnotify_path(inode, path, mask); + fsnotify_file(file, mask); } /* @@ -295,14 +283,7 @@ static inline void fsnotify_close(struct file *file) */ static inline void fsnotify_xattr(struct dentry *dentry) { - struct inode *inode = dentry->d_inode; - __u32 mask = FS_ATTRIB; - - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - fsnotify_parent(NULL, dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + fsnotify_dentry(dentry, FS_ATTRIB); } /* @@ -311,7 +292,6 @@ static inline void fsnotify_xattr(struct dentry *dentry) */ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) { - struct inode *inode = dentry->d_inode; __u32 mask = 0; if (ia_valid & ATTR_UID) @@ -332,13 +312,8 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) if (ia_valid & ATTR_MODE) mask |= FS_ATTRIB; - if (mask) { - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - fsnotify_parent(NULL, dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); - } + if (mask) + fsnotify_dentry(dentry, mask); } #endif /* _LINUX_FS_NOTIFY_H */ From a1aae0570a2b806937120921db2c5d3100ca55fc Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:11 +0200 Subject: [PATCH 03/17] fsnotify: funnel all dirent events through fsnotify_name() Factor out fsnotify_name() from fsnotify_dirent(), so it can also serve link and rename events and use this helper to report all directory entry change events. Both helpers return void because no caller checks their return value. Link: https://lore.kernel.org/r/20200319151022.31456-4-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index f54936aa0365..751da17e003d 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -18,16 +18,24 @@ #include /* - * Notify this @dir inode about a change in the directory entry @dentry. + * Notify this @dir inode about a change in a child directory entry. + * The directory entry may have turned positive or negative or its inode may + * have changed (i.e. renamed over). * * Unlike fsnotify_parent(), the event will be reported regardless of the * FS_EVENT_ON_CHILD mask on the parent inode. */ -static inline int fsnotify_dirent(struct inode *dir, struct dentry *dentry, - __u32 mask) +static inline void fsnotify_name(struct inode *dir, __u32 mask, + struct inode *child, + const struct qstr *name, u32 cookie) { - return fsnotify(dir, mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, - &dentry->d_name, 0); + fsnotify(dir, mask, child, FSNOTIFY_EVENT_INODE, name, cookie); +} + +static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, + __u32 mask) +{ + fsnotify_name(dir, mask, d_inode(dentry), &dentry->d_name, 0); } /* Notify this dentry's parent about a child's events. */ @@ -136,10 +144,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, mask |= FS_ISDIR; } - fsnotify(old_dir, old_dir_mask, source, FSNOTIFY_EVENT_INODE, old_name, - fs_cookie); - fsnotify(new_dir, new_dir_mask, source, FSNOTIFY_EVENT_INODE, new_name, - fs_cookie); + fsnotify_name(old_dir, old_dir_mask, source, old_name, fs_cookie); + fsnotify_name(new_dir, new_dir_mask, source, new_name, fs_cookie); if (target) fsnotify_link_count(target); @@ -194,12 +200,13 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) * Note: We have to pass also the linked inode ptr as some filesystems leave * new_dentry->d_inode NULL and instantiate inode pointer later */ -static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry) +static inline void fsnotify_link(struct inode *dir, struct inode *inode, + struct dentry *new_dentry) { fsnotify_link_count(inode); audit_inode_child(dir, new_dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, &new_dentry->d_name, 0); + fsnotify_name(dir, FS_CREATE, inode, &new_dentry->d_name, 0); } /* From aa93bdc5500cc93ba31afeda1a61610d117947ad Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:12 +0200 Subject: [PATCH 04/17] fsnotify: use helpers to access data by data_type Create helpers to access path and inode from different data types. Link: https://lore.kernel.org/r/20200319151022.31456-5-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 18 +++++++-------- fs/notify/fsnotify.c | 5 ++-- fs/notify/inotify/inotify_fsnotify.c | 8 +++---- include/linux/fsnotify_backend.h | 34 ++++++++++++++++++++++++---- kernel/audit_fsnotify.c | 13 ++--------- kernel/audit_watch.c | 16 ++----------- 6 files changed, 48 insertions(+), 46 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 5778d1347b35..19ec7a4f4d50 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -151,7 +151,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, { __u32 marks_mask = 0, marks_ignored_mask = 0; __u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS; - const struct path *path = data; + const struct path *path = fsnotify_data_path(data, data_type); struct fsnotify_mark *mark; int type; @@ -160,7 +160,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, if (!FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { /* Do we have path to open a file descriptor? */ - if (data_type != FSNOTIFY_EVENT_PATH) + if (!path) return 0; /* Path type events are only relevant for files and dirs */ if (!d_is_reg(path->dentry) && !d_can_lookup(path->dentry)) @@ -269,11 +269,8 @@ static struct inode *fanotify_fid_inode(struct inode *to_tell, u32 event_mask, { if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) return to_tell; - else if (data_type == FSNOTIFY_EVENT_INODE) - return (struct inode *)data; - else if (data_type == FSNOTIFY_EVENT_PATH) - return d_inode(((struct path *)data)->dentry); - return NULL; + + return (struct inode *)fsnotify_data_inode(data, data_type); } struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, @@ -284,6 +281,7 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, struct fanotify_event *event = NULL; gfp_t gfp = GFP_KERNEL_ACCOUNT; struct inode *id = fanotify_fid_inode(inode, mask, data, data_type); + const struct path *path = fsnotify_data_path(data, data_type); /* * For queues with unlimited length lost events are not expected and @@ -324,10 +322,10 @@ init: __maybe_unused if (id && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { /* Report the event without a file identifier on encode error */ event->fh_type = fanotify_encode_fid(event, id, gfp, fsid); - } else if (data_type == FSNOTIFY_EVENT_PATH) { + } else if (path) { event->fh_type = FILEID_ROOT; - event->path = *((struct path *)data); - path_get(&event->path); + event->path = *path; + path_get(path); } else { event->fh_type = FILEID_INVALID; event->path.mnt = NULL; diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 46f225580009..a5d6467f89a0 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -318,6 +318,7 @@ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info) int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, const struct qstr *file_name, u32 cookie) { + const struct path *path = fsnotify_data_path(data, data_is); struct fsnotify_iter_info iter_info = {}; struct super_block *sb = to_tell->i_sb; struct mount *mnt = NULL; @@ -325,8 +326,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, int ret = 0; __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); - if (data_is == FSNOTIFY_EVENT_PATH) { - mnt = real_mount(((const struct path *)data)->mnt); + if (path) { + mnt = real_mount(path->mnt); mnt_or_sb_mask |= mnt->mnt_fsnotify_mask; } /* An event "on child" is not intended for a mount/sb mark */ diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index d510223d302c..6bb98522bbfd 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -61,6 +61,7 @@ int inotify_handle_event(struct fsnotify_group *group, const struct qstr *file_name, u32 cookie, struct fsnotify_iter_info *iter_info) { + const struct path *path = fsnotify_data_path(data, data_type); struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); struct inotify_inode_mark *i_mark; struct inotify_event_info *event; @@ -73,12 +74,9 @@ int inotify_handle_event(struct fsnotify_group *group, return 0; if ((inode_mark->mask & FS_EXCL_UNLINK) && - (data_type == FSNOTIFY_EVENT_PATH)) { - const struct path *path = data; + path && d_unlinked(path->dentry)) + return 0; - if (d_unlinked(path->dentry)) - return 0; - } if (file_name) { len = file_name->len; alloc_len += len + 1; diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index db3cabb4600e..5cc838db422a 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -212,10 +212,36 @@ struct fsnotify_group { }; }; -/* when calling fsnotify tell it if the data is a path or inode */ -#define FSNOTIFY_EVENT_NONE 0 -#define FSNOTIFY_EVENT_PATH 1 -#define FSNOTIFY_EVENT_INODE 2 +/* When calling fsnotify tell it if the data is a path or inode */ +enum fsnotify_data_type { + FSNOTIFY_EVENT_NONE, + FSNOTIFY_EVENT_PATH, + FSNOTIFY_EVENT_INODE, +}; + +static inline const struct inode *fsnotify_data_inode(const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_INODE: + return data; + case FSNOTIFY_EVENT_PATH: + return d_inode(((const struct path *)data)->dentry); + default: + return NULL; + } +} + +static inline const struct path *fsnotify_data_path(const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_PATH: + return data; + default: + return NULL; + } +} enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_INODE, diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index f0d243318452..3596448bfdab 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -160,23 +160,14 @@ static int audit_mark_handle_event(struct fsnotify_group *group, { struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); struct audit_fsnotify_mark *audit_mark; - const struct inode *inode = NULL; + const struct inode *inode = fsnotify_data_inode(data, data_type); audit_mark = container_of(inode_mark, struct audit_fsnotify_mark, mark); BUG_ON(group != audit_fsnotify_group); - switch (data_type) { - case (FSNOTIFY_EVENT_PATH): - inode = ((const struct path *)data)->dentry->d_inode; - break; - case (FSNOTIFY_EVENT_INODE): - inode = (const struct inode *)data; - break; - default: - BUG(); + if (WARN_ON(!inode)) return 0; - } if (mask & (FS_CREATE|FS_MOVED_TO|FS_DELETE|FS_MOVED_FROM)) { if (audit_compare_dname_path(dname, audit_mark->path, AUDIT_NAME_FULL)) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 4508d5e0cf69..dcfbb44c6720 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -473,25 +473,13 @@ static int audit_watch_handle_event(struct fsnotify_group *group, struct fsnotify_iter_info *iter_info) { struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); - const struct inode *inode; + const struct inode *inode = fsnotify_data_inode(data, data_type); struct audit_parent *parent; parent = container_of(inode_mark, struct audit_parent, mark); BUG_ON(group != audit_watch_group); - - switch (data_type) { - case (FSNOTIFY_EVENT_PATH): - inode = d_backing_inode(((const struct path *)data)->dentry); - break; - case (FSNOTIFY_EVENT_INODE): - inode = (const struct inode *)data; - break; - default: - BUG(); - inode = NULL; - break; - } + WARN_ON(!inode); if (mask & (FS_CREATE|FS_MOVED_TO) && inode) audit_update_watch(parent, dname, inode->i_sb->s_dev, inode->i_ino, 0); From 017de65fe58f2b0ca428b5830609520ded5898b9 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:13 +0200 Subject: [PATCH 05/17] fsnotify: simplify arguments passing to fsnotify_parent() Instead of passing both dentry and path and having to figure out which one to use, pass data/data_type to simplify the code. Link: https://lore.kernel.org/r/20200319151022.31456-6-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fsnotify.c | 15 ++++----------- include/linux/fsnotify.h | 14 ++------------ include/linux/fsnotify_backend.h | 14 ++++++++------ 3 files changed, 14 insertions(+), 29 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index a5d6467f89a0..193530f57963 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -143,15 +143,13 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) } /* Notify this dentry's parent about a child's events. */ -int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask) +int fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, + int data_type) { struct dentry *parent; struct inode *p_inode; int ret = 0; - if (!dentry) - dentry = path->dentry; - if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) return 0; @@ -168,12 +166,7 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask mask |= FS_EVENT_ON_CHILD; take_dentry_name_snapshot(&name, dentry); - if (path) - ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH, - &name.name, 0); - else - ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, - &name.name, 0); + ret = fsnotify(p_inode, mask, data, data_type, &name.name, 0); release_dentry_name_snapshot(&name); } @@ -181,7 +174,7 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask return ret; } -EXPORT_SYMBOL_GPL(__fsnotify_parent); +EXPORT_SYMBOL_GPL(fsnotify_parent); static int send_to_group(struct inode *to_tell, __u32 mask, const void *data, diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 751da17e003d..860018f3e545 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -38,16 +38,6 @@ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, fsnotify_name(dir, mask, d_inode(dentry), &dentry->d_name, 0); } -/* Notify this dentry's parent about a child's events. */ -static inline int fsnotify_parent(const struct path *path, - struct dentry *dentry, __u32 mask) -{ - if (!dentry) - dentry = path->dentry; - - return __fsnotify_parent(path, dentry, mask); -} - /* * Simple wrappers to consolidate calls fsnotify_parent()/fsnotify() when * an event is on a file/dentry. @@ -59,7 +49,7 @@ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; - fsnotify_parent(NULL, dentry, mask); + fsnotify_parent(dentry, mask, inode, FSNOTIFY_EVENT_INODE); fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } @@ -75,7 +65,7 @@ static inline int fsnotify_file(struct file *file, __u32 mask) if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; - ret = fsnotify_parent(path, NULL, mask); + ret = fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); if (ret) return ret; diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 5cc838db422a..337c87cf34d6 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -376,9 +376,10 @@ struct fsnotify_mark { /* called from the vfs helpers */ /* main fsnotify call to send events */ -extern int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, - const struct qstr *name, u32 cookie); -extern int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask); +extern int fsnotify(struct inode *to_tell, __u32 mask, const void *data, + int data_type, const struct qstr *name, u32 cookie); +extern int fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, + int data_type); extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); extern void fsnotify_sb_delete(struct super_block *sb); @@ -533,13 +534,14 @@ static inline void fsnotify_init_event(struct fsnotify_event *event, #else -static inline int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, - const struct qstr *name, u32 cookie) +static inline int fsnotify(struct inode *to_tell, __u32 mask, const void *data, + int data_type, const struct qstr *name, u32 cookie) { return 0; } -static inline int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask) +static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, + const void *data, int data_type) { return 0; } From dfc2d2594e4a79204a3967585245f00644b8f838 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:15 +0200 Subject: [PATCH 06/17] fsnotify: replace inode pointer with an object id The event inode field is used only for comparison in queue merges and cannot be dereferenced after handle_event(), because it does not hold a refcount on the inode. Replace it with an abstract id to do the same thing. Link: https://lore.kernel.org/r/20200319151022.31456-8-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 4 ++-- fs/notify/inotify/inotify_fsnotify.c | 4 ++-- fs/notify/inotify/inotify_user.c | 2 +- include/linux/fsnotify_backend.h | 7 +++---- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 19ec7a4f4d50..6a202aaf941f 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -26,7 +26,7 @@ static bool should_merge(struct fsnotify_event *old_fsn, old = FANOTIFY_E(old_fsn); new = FANOTIFY_E(new_fsn); - if (old_fsn->inode != new_fsn->inode || old->pid != new->pid || + if (old_fsn->objectid != new_fsn->objectid || old->pid != new->pid || old->fh_type != new->fh_type || old->fh_len != new->fh_len) return false; @@ -312,7 +312,7 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, if (!event) goto out; init: __maybe_unused - fsnotify_init_event(&event->fse, inode); + fsnotify_init_event(&event->fse, (unsigned long)inode); event->mask = mask; if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) event->pid = get_pid(task_pid(current)); diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 6bb98522bbfd..2ebc89047153 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -39,7 +39,7 @@ static bool event_compare(struct fsnotify_event *old_fsn, if (old->mask & FS_IN_IGNORED) return false; if ((old->mask == new->mask) && - (old_fsn->inode == new_fsn->inode) && + (old_fsn->objectid == new_fsn->objectid) && (old->name_len == new->name_len) && (!old->name_len || !strcmp(old->name, new->name))) return true; @@ -116,7 +116,7 @@ int inotify_handle_event(struct fsnotify_group *group, mask &= ~IN_ISDIR; fsn_event = &event->fse; - fsnotify_init_event(fsn_event, inode); + fsnotify_init_event(fsn_event, (unsigned long)inode); event->mask = mask; event->wd = i_mark->wd; event->sync_cookie = cookie; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 107537a543fd..81ffc8629fc4 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -635,7 +635,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) return ERR_PTR(-ENOMEM); } group->overflow_event = &oevent->fse; - fsnotify_init_event(group->overflow_event, NULL); + fsnotify_init_event(group->overflow_event, 0); oevent->mask = FS_Q_OVERFLOW; oevent->wd = -1; oevent->sync_cookie = 0; diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 337c87cf34d6..c72cbea20ef7 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -132,8 +132,7 @@ struct fsnotify_ops { */ struct fsnotify_event { struct list_head list; - /* inode may ONLY be dereferenced during handle_event(). */ - struct inode *inode; /* either the inode the event happened to or its parent */ + unsigned long objectid; /* identifier for queue merges */ }; /* @@ -526,10 +525,10 @@ extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); static inline void fsnotify_init_event(struct fsnotify_event *event, - struct inode *inode) + unsigned long objectid) { INIT_LIST_HEAD(&event->list); - event->inode = inode; + event->objectid = objectid; } #else From f367a62a7cad2447d835a9f14fc63997a9137246 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:16 +0200 Subject: [PATCH 07/17] fanotify: merge duplicate events on parent and child With inotify, when a watch is set on a directory and on its child, an event on the child is reported twice, once with wd of the parent watch and once with wd of the child watch without the filename. With fanotify, when a watch is set on a directory and on its child, an event on the child is reported twice, but it has the exact same information - either an open file descriptor of the child or an encoded fid of the child. The reason that the two identical events are not merged is because the object id used for merging events in the queue is the child inode in one event and parent inode in the other. For events with path or dentry data, use the victim inode instead of the watched inode as the object id for event merging, so that the event reported on parent will be merged with the event reported on the child. Link: https://lore.kernel.org/r/20200319151022.31456-9-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 6a202aaf941f..97d34b958761 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -312,7 +312,12 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, if (!event) goto out; init: __maybe_unused - fsnotify_init_event(&event->fse, (unsigned long)inode); + /* + * Use the victim inode instead of the watching inode as the id for + * event queue, so event reported on parent is merged with event + * reported on child when both directory and child watches exist. + */ + fsnotify_init_event(&event->fse, (unsigned long)id); event->mask = mask; if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) event->pid = get_pid(task_pid(current)); From 55bf882c7f13dda8bbe624040c6d5b4fbb812d16 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:17 +0200 Subject: [PATCH 08/17] fanotify: fix merging marks masks with FAN_ONDIR Change the logic of FAN_ONDIR in two ways that are similar to the logic of FAN_EVENT_ON_CHILD, that was fixed in commit 54a307ba8d3c ("fanotify: fix logic of events on child"): 1. The flag is meaningless in ignore mask 2. The flag refers only to events in the mask of the mark where it is set This is what the fanotify_mark.2 man page says about FAN_ONDIR: "Without this flag, only events for files are created." It doesn't say anything about setting this flag in ignore mask to stop getting events on directories nor can I think of any setup where this capability would be useful. Currently, when marks masks are merged, the FAN_ONDIR flag set in one mark affects the events that are set in another mark's mask and this behavior causes unexpected results. For example, a user adds a mark on a directory with mask FAN_ATTRIB | FAN_ONDIR and a mount mark with mask FAN_OPEN (without FAN_ONDIR). An opendir() of that directory (which is inside that mount) generates a FAN_OPEN event even though neither of the marks requested to get open events on directories. Link: https://lore.kernel.org/r/20200319151022.31456-10-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 97d34b958761..960f4f4d9e8f 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -171,6 +171,13 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, if (!fsnotify_iter_should_report_type(iter_info, type)) continue; mark = iter_info->marks[type]; + /* + * If the event is on dir and this mark doesn't care about + * events on dir, don't send it! + */ + if (event_mask & FS_ISDIR && !(mark->mask & FS_ISDIR)) + continue; + /* * If the event is for a child and this mark doesn't care about * events on a child, don't send it! @@ -203,10 +210,6 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, user_mask &= ~FAN_ONDIR; } - if (event_mask & FS_ISDIR && - !(marks_mask & FS_ISDIR & ~marks_ignored_mask)) - return 0; - return test_mask & user_mask; } From a741c2febeadc675aef84bcd6924b6522577d593 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 24 Mar 2020 15:27:52 +0100 Subject: [PATCH 09/17] fanotify: Simplify create_fd() create_fd() is never used with invalid path. Also the only thing it needs to know from fanotify_event is the path. Simplify the function to take path directly and assume it is correct. Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify_user.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 0aa362b88550..e48fc07d80ef 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -96,15 +96,12 @@ out: return fsn_event; } -static int create_fd(struct fsnotify_group *group, - struct fanotify_event *event, +static int create_fd(struct fsnotify_group *group, struct path *path, struct file **file) { int client_fd; struct file *new_file; - pr_debug("%s: group=%p event=%p\n", __func__, group, event); - client_fd = get_unused_fd_flags(group->fanotify_data.f_flags); if (client_fd < 0) return client_fd; @@ -113,14 +110,9 @@ static int create_fd(struct fsnotify_group *group, * we need a new file handle for the userspace program so it can read even if it was * originally opened O_WRONLY. */ - /* it's possible this event was an overflow event. in that case dentry and mnt - * are NULL; That's fine, just don't call dentry open */ - if (event->path.dentry && event->path.mnt) - new_file = dentry_open(&event->path, - group->fanotify_data.f_flags | FMODE_NONOTIFY, - current_cred()); - else - new_file = ERR_PTR(-EOVERFLOW); + new_file = dentry_open(path, + group->fanotify_data.f_flags | FMODE_NONOTIFY, + current_cred()); if (IS_ERR(new_file)) { /* * we still send an event even if we can't open the file. this @@ -276,9 +268,13 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, metadata.pid = pid_vnr(event->pid); if (fanotify_event_has_path(event)) { - fd = create_fd(group, event, &f); - if (fd < 0) - return fd; + struct path *path = &event->path; + + if (path->mnt && path->dentry) { + fd = create_fd(group, path, &f); + if (fd < 0) + return fd; + } } else if (fanotify_event_has_fid(event)) { metadata.event_len += fanotify_event_info_len(event); } From afc894c784c84cb3bb85a235feca2cb278f7b023 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 24 Mar 2020 16:55:37 +0100 Subject: [PATCH 10/17] fanotify: Store fanotify handles differently Currently, struct fanotify_fid groups fsid and file handle and is unioned together with struct path to save space. Also there is fh_type and fh_len directly in struct fanotify_event to avoid padding overhead. In the follwing patches, we will be adding more event types and this packing makes code difficult to follow. So unpack everything and create struct fanotify_fh which groups members logically related to file handle to make code easier to follow. In the following patch we will pack things again differently to make events smaller. Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 97 +++++++++++++++--------- fs/notify/fanotify/fanotify.h | 115 ++++++++++++++++------------- fs/notify/fanotify/fanotify_user.c | 39 +++++----- 3 files changed, 145 insertions(+), 106 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 960f4f4d9e8f..64b05be4058d 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -17,6 +17,31 @@ #include "fanotify.h" +static bool fanotify_path_equal(struct path *p1, struct path *p2) +{ + return p1->mnt == p2->mnt && p1->dentry == p2->dentry; +} + +static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1, + __kernel_fsid_t *fsid2) +{ + return fsid1->val[0] == fsid1->val[0] && fsid2->val[1] == fsid2->val[1]; +} + +static bool fanotify_fh_equal(struct fanotify_fh *fh1, + struct fanotify_fh *fh2) +{ + if (fh1->type != fh2->type || fh1->len != fh2->len) + return false; + + /* Do not merge events if we failed to encode fh */ + if (fh1->type == FILEID_INVALID) + return false; + + return !fh1->len || + !memcmp(fanotify_fh_buf(fh1), fanotify_fh_buf(fh2), fh1->len); +} + static bool should_merge(struct fsnotify_event *old_fsn, struct fsnotify_event *new_fsn) { @@ -27,12 +52,12 @@ static bool should_merge(struct fsnotify_event *old_fsn, new = FANOTIFY_E(new_fsn); if (old_fsn->objectid != new_fsn->objectid || old->pid != new->pid || - old->fh_type != new->fh_type || old->fh_len != new->fh_len) + old->fh.type != new->fh.type) return false; if (fanotify_event_has_path(old)) { - return old->path.mnt == new->path.mnt && - old->path.dentry == new->path.dentry; + return fanotify_path_equal(fanotify_event_path(old), + fanotify_event_path(new)); } else if (fanotify_event_has_fid(old)) { /* * We want to merge many dirent events in the same dir (i.e. @@ -42,8 +67,11 @@ static bool should_merge(struct fsnotify_event *old_fsn, * mask FAN_CREATE|FAN_DELETE|FAN_ONDIR if it describes mkdir+ * unlink pair or rmdir+create pair of events. */ - return (old->mask & FS_ISDIR) == (new->mask & FS_ISDIR) && - fanotify_fid_equal(&old->fid, &new->fid, old->fh_len); + if ((old->mask & FS_ISDIR) != (new->mask & FS_ISDIR)) + return false; + + return fanotify_fsid_equal(&old->fsid, &new->fsid) && + fanotify_fh_equal(&old->fh, &new->fh); } /* Do not merge events if we failed to encode fid */ @@ -213,15 +241,14 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, return test_mask & user_mask; } -static int fanotify_encode_fid(struct fanotify_event *event, - struct inode *inode, gfp_t gfp, - __kernel_fsid_t *fsid) +static void fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, + gfp_t gfp) { - struct fanotify_fid *fid = &event->fid; - int dwords, bytes = 0; - int err, type; + int dwords, type, bytes = 0; + char *ext_buf = NULL; + void *buf = fh->buf; + int err; - fid->ext_fh = NULL; dwords = 0; err = -ENOENT; type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL); @@ -232,31 +259,32 @@ static int fanotify_encode_fid(struct fanotify_event *event, if (bytes > FANOTIFY_INLINE_FH_LEN) { /* Treat failure to allocate fh as failure to allocate event */ err = -ENOMEM; - fid->ext_fh = kmalloc(bytes, gfp); - if (!fid->ext_fh) + ext_buf = kmalloc(bytes, gfp); + if (!ext_buf) goto out_err; + + *fanotify_fh_ext_buf_ptr(fh) = ext_buf; + buf = ext_buf; } - type = exportfs_encode_inode_fh(inode, fanotify_fid_fh(fid, bytes), - &dwords, NULL); + type = exportfs_encode_inode_fh(inode, buf, &dwords, NULL); err = -EINVAL; if (!type || type == FILEID_INVALID || bytes != dwords << 2) goto out_err; - fid->fsid = *fsid; - event->fh_len = bytes; + fh->type = type; + fh->len = bytes; - return type; + return; out_err: - pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, " - "type=%d, bytes=%d, err=%i)\n", - fsid->val[0], fsid->val[1], type, bytes, err); - kfree(fid->ext_fh); - fid->ext_fh = NULL; - event->fh_len = 0; - - return FILEID_INVALID; + pr_warn_ratelimited("fanotify: failed to encode fid (type=%d, len=%d, err=%i)\n", + type, bytes, err); + kfree(ext_buf); + *fanotify_fh_ext_buf_ptr(fh) = NULL; + /* Report the event without a file identifier on encode error */ + fh->type = FILEID_INVALID; + fh->len = 0; } /* @@ -326,16 +354,17 @@ init: __maybe_unused event->pid = get_pid(task_pid(current)); else event->pid = get_pid(task_tgid(current)); - event->fh_len = 0; + event->fh.len = 0; if (id && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { - /* Report the event without a file identifier on encode error */ - event->fh_type = fanotify_encode_fid(event, id, gfp, fsid); + event->fsid = *fsid; + if (id) + fanotify_encode_fh(&event->fh, id, gfp); } else if (path) { - event->fh_type = FILEID_ROOT; + event->fh.type = FILEID_ROOT; event->path = *path; path_get(path); } else { - event->fh_type = FILEID_INVALID; + event->fh.type = FILEID_INVALID; event->path.mnt = NULL; event->path.dentry = NULL; } @@ -483,8 +512,8 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) event = FANOTIFY_E(fsn_event); if (fanotify_event_has_path(event)) path_put(&event->path); - else if (fanotify_event_has_ext_fh(event)) - kfree(event->fid.ext_fh); + else if (fanotify_fh_has_ext_buf(&event->fh)) + kfree(fanotify_fh_ext_buf(&event->fh)); put_pid(event->pid); if (fanotify_is_perm_event(event->mask)) { kmem_cache_free(fanotify_perm_event_cachep, diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 68b30504284c..f9da4481613d 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -18,39 +18,37 @@ enum { /* * 3 dwords are sufficient for most local fs (64bit ino, 32bit generation). - * For 32bit arch, fid increases the size of fanotify_event by 12 bytes and - * fh_* fields increase the size of fanotify_event by another 4 bytes. - * For 64bit arch, fid increases the size of fanotify_fid by 8 bytes and - * fh_* fields are packed in a hole after mask. + * fh buf should be dword aligned. On 64bit arch, the ext_buf pointer is + * stored in either the first or last 2 dwords. */ -#if BITS_PER_LONG == 32 #define FANOTIFY_INLINE_FH_LEN (3 << 2) -#else -#define FANOTIFY_INLINE_FH_LEN (4 << 2) -#endif -struct fanotify_fid { - __kernel_fsid_t fsid; - union { - unsigned char fh[FANOTIFY_INLINE_FH_LEN]; - unsigned char *ext_fh; - }; -}; +struct fanotify_fh { + unsigned char buf[FANOTIFY_INLINE_FH_LEN]; + u8 type; + u8 len; +} __aligned(4); -static inline void *fanotify_fid_fh(struct fanotify_fid *fid, - unsigned int fh_len) +static inline bool fanotify_fh_has_ext_buf(struct fanotify_fh *fh) { - return fh_len <= FANOTIFY_INLINE_FH_LEN ? fid->fh : fid->ext_fh; + return fh->len > FANOTIFY_INLINE_FH_LEN; } -static inline bool fanotify_fid_equal(struct fanotify_fid *fid1, - struct fanotify_fid *fid2, - unsigned int fh_len) +static inline char **fanotify_fh_ext_buf_ptr(struct fanotify_fh *fh) { - return fid1->fsid.val[0] == fid2->fsid.val[0] && - fid1->fsid.val[1] == fid2->fsid.val[1] && - !memcmp(fanotify_fid_fh(fid1, fh_len), - fanotify_fid_fh(fid2, fh_len), fh_len); + BUILD_BUG_ON(__alignof__(char *) - 4 + sizeof(char *) > + FANOTIFY_INLINE_FH_LEN); + return (char **)ALIGN((unsigned long)(fh->buf), __alignof__(char *)); +} + +static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh) +{ + return *fanotify_fh_ext_buf_ptr(fh); +} + +static inline void *fanotify_fh_buf(struct fanotify_fh *fh) +{ + return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh->buf; } /* @@ -62,50 +60,53 @@ struct fanotify_event { struct fsnotify_event fse; u32 mask; /* - * Those fields are outside fanotify_fid to pack fanotify_event nicely - * on 64bit arch and to use fh_type as an indication of whether path - * or fid are used in the union: - * FILEID_ROOT (0) for path, > 0 for fid, FILEID_INVALID for neither. + * With FAN_REPORT_FID, we do not hold any reference on the + * victim object. Instead we store its NFS file handle and its + * filesystem's fsid as a unique identifier. */ - u8 fh_type; - u8 fh_len; - u16 pad; - union { - /* - * We hold ref to this path so it may be dereferenced at any - * point during this object's lifetime - */ - struct path path; - /* - * With FAN_REPORT_FID, we do not hold any reference on the - * victim object. Instead we store its NFS file handle and its - * filesystem's fsid as a unique identifier. - */ - struct fanotify_fid fid; - }; + __kernel_fsid_t fsid; + struct fanotify_fh fh; + /* + * We hold ref to this path so it may be dereferenced at any + * point during this object's lifetime + */ + struct path path; struct pid *pid; }; static inline bool fanotify_event_has_path(struct fanotify_event *event) { - return event->fh_type == FILEID_ROOT; + return event->fh.type == FILEID_ROOT; } static inline bool fanotify_event_has_fid(struct fanotify_event *event) { - return event->fh_type != FILEID_ROOT && - event->fh_type != FILEID_INVALID; + return event->fh.type != FILEID_ROOT && + event->fh.type != FILEID_INVALID; } -static inline bool fanotify_event_has_ext_fh(struct fanotify_event *event) +static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event) { - return fanotify_event_has_fid(event) && - event->fh_len > FANOTIFY_INLINE_FH_LEN; + if (fanotify_event_has_fid(event)) + return &event->fsid; + else + return NULL; } -static inline void *fanotify_event_fh(struct fanotify_event *event) +static inline struct fanotify_fh *fanotify_event_object_fh( + struct fanotify_event *event) { - return fanotify_fid_fh(&event->fid, event->fh_len); + if (fanotify_event_has_fid(event)) + return &event->fh; + else + return NULL; +} + +static inline int fanotify_event_object_fh_len(struct fanotify_event *event) +{ + struct fanotify_fh *fh = fanotify_event_object_fh(event); + + return fh ? fh->len : 0; } /* @@ -139,6 +140,14 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) return container_of(fse, struct fanotify_event, fse); } +static inline struct path *fanotify_event_path(struct fanotify_event *event) +{ + if (fanotify_event_has_path(event)) + return &event->path; + else + return NULL; +} + struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const void *data, int data_type, diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index e48fc07d80ef..0b3b74fa3a27 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -53,11 +53,13 @@ struct kmem_cache *fanotify_perm_event_cachep __read_mostly; static int fanotify_event_info_len(struct fanotify_event *event) { - if (!fanotify_event_has_fid(event)) + int fh_len = fanotify_event_object_fh_len(event); + + if (!fh_len) return 0; return roundup(sizeof(struct fanotify_event_info_fid) + - sizeof(struct file_handle) + event->fh_len, + sizeof(struct file_handle) + fh_len, FANOTIFY_EVENT_ALIGN); } @@ -200,8 +202,9 @@ static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) { struct fanotify_event_info_fid info = { }; struct file_handle handle = { }; - unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh; - size_t fh_len = event->fh_len; + unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh_buf; + struct fanotify_fh *fh = fanotify_event_object_fh(event); + size_t fh_len = fh->len; size_t len = fanotify_event_info_len(event); if (!len) @@ -213,13 +216,13 @@ static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) /* Copy event info fid header followed by vaiable sized file handle */ info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID; info.hdr.len = len; - info.fsid = event->fid.fsid; + info.fsid = *fanotify_event_fsid(event); if (copy_to_user(buf, &info, sizeof(info))) return -EFAULT; buf += sizeof(info); len -= sizeof(info); - handle.handle_type = event->fh_type; + handle.handle_type = fh->type; handle.handle_bytes = fh_len; if (copy_to_user(buf, &handle, sizeof(handle))) return -EFAULT; @@ -230,12 +233,12 @@ static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) * For an inline fh, copy through stack to exclude the copy from * usercopy hardening protections. */ - fh = fanotify_event_fh(event); + fh_buf = fanotify_fh_buf(fh); if (fh_len <= FANOTIFY_INLINE_FH_LEN) { - memcpy(bounce, fh, fh_len); - fh = bounce; + memcpy(bounce, fh_buf, fh_len); + fh_buf = bounce; } - if (copy_to_user(buf, fh, fh_len)) + if (copy_to_user(buf, fh_buf, fh_len)) return -EFAULT; /* Pad with 0's */ @@ -254,12 +257,14 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, { struct fanotify_event_metadata metadata; struct fanotify_event *event; + struct path *path; struct file *f = NULL; int ret, fd = FAN_NOFD; pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event); event = container_of(fsn_event, struct fanotify_event, fse); + path = fanotify_event_path(event); metadata.event_len = FAN_EVENT_METADATA_LEN; metadata.metadata_len = FAN_EVENT_METADATA_LEN; metadata.vers = FANOTIFY_METADATA_VERSION; @@ -267,16 +272,12 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; metadata.pid = pid_vnr(event->pid); - if (fanotify_event_has_path(event)) { - struct path *path = &event->path; - - if (path->mnt && path->dentry) { - fd = create_fd(group, path, &f); - if (fd < 0) - return fd; - } - } else if (fanotify_event_has_fid(event)) { + if (fanotify_event_has_fid(event)) { metadata.event_len += fanotify_event_info_len(event); + } else if (path && path->mnt && path->dentry) { + fd = create_fd(group, path, &f); + if (fd < 0) + return fd; } metadata.fd = fd; From 7088f35720a55b99624ea36091538baec7ec611f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 24 Mar 2020 17:04:20 +0100 Subject: [PATCH 11/17] fanotify: divorce fanotify_path_event and fanotify_fid_event Breakup the union and make them both inherit from abstract fanotify_event. fanotify_path_event, fanotify_fid_event and fanotify_perm_event inherit from fanotify_event. type field in abstract fanotify_event determines the concrete event type. fanotify_path_event, fanotify_fid_event and fanotify_perm_event are allocated from separate memcache pools. Rename fanotify_perm_event casting macro to FANOTIFY_PERM(), so that FANOTIFY_PE() and FANOTIFY_FE() can be used as casting macros to fanotify_path_event and fanotify_fid_event. [JK: Cleanup FANOTIFY_PE() and FANOTIFY_FE() to be proper inline functions and remove requirement that fanotify_event is the first in event structures] Link: https://lore.kernel.org/r/20200319151022.31456-11-amir73il@gmail.com Suggested-by: Jan Kara Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 126 +++++++++++++++++++++-------- fs/notify/fanotify/fanotify.h | 77 +++++++++++------- fs/notify/fanotify/fanotify_user.c | 71 ++++++++-------- 3 files changed, 180 insertions(+), 94 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 64b05be4058d..39eb71f7c413 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -29,7 +29,7 @@ static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1, } static bool fanotify_fh_equal(struct fanotify_fh *fh1, - struct fanotify_fh *fh2) + struct fanotify_fh *fh2) { if (fh1->type != fh2->type || fh1->len != fh2->len) return false; @@ -42,6 +42,17 @@ static bool fanotify_fh_equal(struct fanotify_fh *fh1, !memcmp(fanotify_fh_buf(fh1), fanotify_fh_buf(fh2), fh1->len); } +static bool fanotify_fid_event_equal(struct fanotify_fid_event *ffe1, + struct fanotify_fid_event *ffe2) +{ + /* Do not merge fid events without object fh */ + if (!ffe1->object_fh.len) + return false; + + return fanotify_fsid_equal(&ffe1->fsid, &ffe2->fsid) && + fanotify_fh_equal(&ffe1->object_fh, &ffe2->object_fh); +} + static bool should_merge(struct fsnotify_event *old_fsn, struct fsnotify_event *new_fsn) { @@ -51,14 +62,15 @@ static bool should_merge(struct fsnotify_event *old_fsn, old = FANOTIFY_E(old_fsn); new = FANOTIFY_E(new_fsn); - if (old_fsn->objectid != new_fsn->objectid || old->pid != new->pid || - old->fh.type != new->fh.type) + if (old_fsn->objectid != new_fsn->objectid || + old->type != new->type || old->pid != new->pid) return false; - if (fanotify_event_has_path(old)) { + switch (old->type) { + case FANOTIFY_EVENT_TYPE_PATH: return fanotify_path_equal(fanotify_event_path(old), fanotify_event_path(new)); - } else if (fanotify_event_has_fid(old)) { + case FANOTIFY_EVENT_TYPE_FID: /* * We want to merge many dirent events in the same dir (i.e. * creates/unlinks/renames), but we do not want to merge dirent @@ -70,11 +82,12 @@ static bool should_merge(struct fsnotify_event *old_fsn, if ((old->mask & FS_ISDIR) != (new->mask & FS_ISDIR)) return false; - return fanotify_fsid_equal(&old->fsid, &new->fsid) && - fanotify_fh_equal(&old->fh, &new->fh); + return fanotify_fid_event_equal(FANOTIFY_FE(old), + FANOTIFY_FE(new)); + default: + WARN_ON_ONCE(1); } - /* Do not merge events if we failed to encode fid */ return false; } @@ -310,6 +323,7 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, __kernel_fsid_t *fsid) { struct fanotify_event *event = NULL; + struct fanotify_fid_event *ffe = NULL; gfp_t gfp = GFP_KERNEL_ACCOUNT; struct inode *id = fanotify_fid_inode(inode, mask, data, data_type); const struct path *path = fsnotify_data_path(data, data_type); @@ -334,14 +348,32 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp); if (!pevent) goto out; + event = &pevent->fae; + event->type = FANOTIFY_EVENT_TYPE_PATH_PERM; pevent->response = 0; pevent->state = FAN_EVENT_INIT; goto init; } - event = kmem_cache_alloc(fanotify_event_cachep, gfp); - if (!event) - goto out; + + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + ffe = kmem_cache_alloc(fanotify_fid_event_cachep, gfp); + if (!ffe) + goto out; + + event = &ffe->fae; + event->type = FANOTIFY_EVENT_TYPE_FID; + } else { + struct fanotify_path_event *pevent; + + pevent = kmem_cache_alloc(fanotify_path_event_cachep, gfp); + if (!pevent) + goto out; + + event = &pevent->fae; + event->type = FANOTIFY_EVENT_TYPE_PATH; + } + init: __maybe_unused /* * Use the victim inode instead of the watching inode as the id for @@ -354,19 +386,23 @@ init: __maybe_unused event->pid = get_pid(task_pid(current)); else event->pid = get_pid(task_tgid(current)); - event->fh.len = 0; - if (id && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { - event->fsid = *fsid; + + if (fanotify_event_has_fid(event)) { + ffe->object_fh.len = 0; + if (fsid) + ffe->fsid = *fsid; if (id) - fanotify_encode_fh(&event->fh, id, gfp); - } else if (path) { - event->fh.type = FILEID_ROOT; - event->path = *path; - path_get(path); - } else { - event->fh.type = FILEID_INVALID; - event->path.mnt = NULL; - event->path.dentry = NULL; + fanotify_encode_fh(&ffe->object_fh, id, gfp); + } else if (fanotify_event_has_path(event)) { + struct path *p = fanotify_event_path(event); + + if (path) { + *p = *path; + path_get(path); + } else { + p->mnt = NULL; + p->dentry = NULL; + } } out: memalloc_unuse_memcg(); @@ -486,7 +522,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, ret = 0; } else if (fanotify_is_perm_event(mask)) { - ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event), + ret = fanotify_get_response(group, FANOTIFY_PERM(event), iter_info); } finish: @@ -505,22 +541,46 @@ static void fanotify_free_group_priv(struct fsnotify_group *group) free_uid(user); } +static void fanotify_free_path_event(struct fanotify_event *event) +{ + path_put(fanotify_event_path(event)); + kmem_cache_free(fanotify_path_event_cachep, FANOTIFY_PE(event)); +} + +static void fanotify_free_perm_event(struct fanotify_event *event) +{ + path_put(fanotify_event_path(event)); + kmem_cache_free(fanotify_perm_event_cachep, FANOTIFY_PERM(event)); +} + +static void fanotify_free_fid_event(struct fanotify_event *event) +{ + struct fanotify_fid_event *ffe = FANOTIFY_FE(event); + + if (fanotify_fh_has_ext_buf(&ffe->object_fh)) + kfree(fanotify_fh_ext_buf(&ffe->object_fh)); + kmem_cache_free(fanotify_fid_event_cachep, ffe); +} + static void fanotify_free_event(struct fsnotify_event *fsn_event) { struct fanotify_event *event; event = FANOTIFY_E(fsn_event); - if (fanotify_event_has_path(event)) - path_put(&event->path); - else if (fanotify_fh_has_ext_buf(&event->fh)) - kfree(fanotify_fh_ext_buf(&event->fh)); put_pid(event->pid); - if (fanotify_is_perm_event(event->mask)) { - kmem_cache_free(fanotify_perm_event_cachep, - FANOTIFY_PE(fsn_event)); - return; + switch (event->type) { + case FANOTIFY_EVENT_TYPE_PATH: + fanotify_free_path_event(event); + break; + case FANOTIFY_EVENT_TYPE_PATH_PERM: + fanotify_free_perm_event(event); + break; + case FANOTIFY_EVENT_TYPE_FID: + fanotify_free_fid_event(event); + break; + default: + WARN_ON_ONCE(1); } - kmem_cache_free(fanotify_event_cachep, event); } static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index f9da4481613d..3b50ee44a0cd 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -5,7 +5,8 @@ #include extern struct kmem_cache *fanotify_mark_cache; -extern struct kmem_cache *fanotify_event_cachep; +extern struct kmem_cache *fanotify_fid_event_cachep; +extern struct kmem_cache *fanotify_path_event_cachep; extern struct kmem_cache *fanotify_perm_event_cachep; /* Possible states of the permission event */ @@ -52,43 +53,45 @@ static inline void *fanotify_fh_buf(struct fanotify_fh *fh) } /* - * Structure for normal fanotify events. It gets allocated in + * Common structure for fanotify events. Concrete structs are allocated in * fanotify_handle_event() and freed when the information is retrieved by - * userspace + * userspace. The type of event determines how it was allocated, how it will + * be freed and which concrete struct it may be cast to. */ +enum fanotify_event_type { + FANOTIFY_EVENT_TYPE_FID, + FANOTIFY_EVENT_TYPE_PATH, + FANOTIFY_EVENT_TYPE_PATH_PERM, +}; + struct fanotify_event { struct fsnotify_event fse; u32 mask; - /* - * With FAN_REPORT_FID, we do not hold any reference on the - * victim object. Instead we store its NFS file handle and its - * filesystem's fsid as a unique identifier. - */ - __kernel_fsid_t fsid; - struct fanotify_fh fh; - /* - * We hold ref to this path so it may be dereferenced at any - * point during this object's lifetime - */ - struct path path; + enum fanotify_event_type type; struct pid *pid; }; -static inline bool fanotify_event_has_path(struct fanotify_event *event) +struct fanotify_fid_event { + struct fanotify_event fae; + __kernel_fsid_t fsid; + struct fanotify_fh object_fh; +}; + +static inline struct fanotify_fid_event * +FANOTIFY_FE(struct fanotify_event *event) { - return event->fh.type == FILEID_ROOT; + return container_of(event, struct fanotify_fid_event, fae); } static inline bool fanotify_event_has_fid(struct fanotify_event *event) { - return event->fh.type != FILEID_ROOT && - event->fh.type != FILEID_INVALID; + return event->type == FANOTIFY_EVENT_TYPE_FID; } static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event) { - if (fanotify_event_has_fid(event)) - return &event->fsid; + if (event->type == FANOTIFY_EVENT_TYPE_FID) + return &FANOTIFY_FE(event)->fsid; else return NULL; } @@ -96,8 +99,8 @@ static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event) static inline struct fanotify_fh *fanotify_event_object_fh( struct fanotify_event *event) { - if (fanotify_event_has_fid(event)) - return &event->fh; + if (event->type == FANOTIFY_EVENT_TYPE_FID) + return &FANOTIFY_FE(event)->object_fh; else return NULL; } @@ -109,6 +112,17 @@ static inline int fanotify_event_object_fh_len(struct fanotify_event *event) return fh ? fh->len : 0; } +struct fanotify_path_event { + struct fanotify_event fae; + struct path path; +}; + +static inline struct fanotify_path_event * +FANOTIFY_PE(struct fanotify_event *event) +{ + return container_of(event, struct fanotify_path_event, fae); +} + /* * Structure for permission fanotify events. It gets allocated and freed in * fanotify_handle_event() since we wait there for user response. When the @@ -118,15 +132,16 @@ static inline int fanotify_event_object_fh_len(struct fanotify_event *event) */ struct fanotify_perm_event { struct fanotify_event fae; + struct path path; unsigned short response; /* userspace answer to the event */ unsigned short state; /* state of the event */ int fd; /* fd we passed to userspace for this event */ }; static inline struct fanotify_perm_event * -FANOTIFY_PE(struct fsnotify_event *fse) +FANOTIFY_PERM(struct fanotify_event *event) { - return container_of(fse, struct fanotify_perm_event, fae.fse); + return container_of(event, struct fanotify_perm_event, fae); } static inline bool fanotify_is_perm_event(u32 mask) @@ -140,10 +155,18 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) return container_of(fse, struct fanotify_event, fse); } +static inline bool fanotify_event_has_path(struct fanotify_event *event) +{ + return event->type == FANOTIFY_EVENT_TYPE_PATH || + event->type == FANOTIFY_EVENT_TYPE_PATH_PERM; +} + static inline struct path *fanotify_event_path(struct fanotify_event *event) { - if (fanotify_event_has_path(event)) - return &event->path; + if (event->type == FANOTIFY_EVENT_TYPE_PATH) + return &FANOTIFY_PE(event)->path; + else if (event->type == FANOTIFY_EVENT_TYPE_PATH_PERM) + return &FANOTIFY_PERM(event)->path; else return NULL; } diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 0b3b74fa3a27..6cb94a6bc980 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -46,7 +46,8 @@ extern const struct fsnotify_ops fanotify_fsnotify_ops; struct kmem_cache *fanotify_mark_cache __read_mostly; -struct kmem_cache *fanotify_event_cachep __read_mostly; +struct kmem_cache *fanotify_fid_event_cachep __read_mostly; +struct kmem_cache *fanotify_path_event_cachep __read_mostly; struct kmem_cache *fanotify_perm_event_cachep __read_mostly; #define FANOTIFY_EVENT_ALIGN 4 @@ -64,16 +65,16 @@ static int fanotify_event_info_len(struct fanotify_event *event) } /* - * Get an fsnotify notification event if one exists and is small + * Get an fanotify notification event if one exists and is small * enough to fit in "count". Return an error pointer if the count * is not large enough. When permission event is dequeued, its state is * updated accordingly. */ -static struct fsnotify_event *get_one_event(struct fsnotify_group *group, +static struct fanotify_event *get_one_event(struct fsnotify_group *group, size_t count) { size_t event_size = FAN_EVENT_METADATA_LEN; - struct fsnotify_event *fsn_event = NULL; + struct fanotify_event *event = NULL; pr_debug("%s: group=%p count=%zd\n", __func__, group, count); @@ -87,15 +88,15 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, } if (event_size > count) { - fsn_event = ERR_PTR(-EINVAL); + event = ERR_PTR(-EINVAL); goto out; } - fsn_event = fsnotify_remove_first_event(group); - if (fanotify_is_perm_event(FANOTIFY_E(fsn_event)->mask)) - FANOTIFY_PE(fsn_event)->state = FAN_EVENT_REPORTED; + event = FANOTIFY_E(fsnotify_remove_first_event(group)); + if (fanotify_is_perm_event(event->mask)) + FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED; out: spin_unlock(&group->notification_lock); - return fsn_event; + return event; } static int create_fd(struct fsnotify_group *group, struct path *path, @@ -252,19 +253,16 @@ static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) } static ssize_t copy_event_to_user(struct fsnotify_group *group, - struct fsnotify_event *fsn_event, + struct fanotify_event *event, char __user *buf, size_t count) { struct fanotify_event_metadata metadata; - struct fanotify_event *event; - struct path *path; + struct path *path = fanotify_event_path(event); struct file *f = NULL; int ret, fd = FAN_NOFD; - pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event); + pr_debug("%s: group=%p event=%p\n", __func__, group, event); - event = container_of(fsn_event, struct fanotify_event, fse); - path = fanotify_event_path(event); metadata.event_len = FAN_EVENT_METADATA_LEN; metadata.metadata_len = FAN_EVENT_METADATA_LEN; metadata.vers = FANOTIFY_METADATA_VERSION; @@ -293,9 +291,9 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, goto out_close_fd; if (fanotify_is_perm_event(event->mask)) - FANOTIFY_PE(fsn_event)->fd = fd; + FANOTIFY_PERM(event)->fd = fd; - if (fanotify_event_has_path(event)) { + if (f) { fd_install(fd, f); } else if (fanotify_event_has_fid(event)) { ret = copy_fid_to_user(event, buf + FAN_EVENT_METADATA_LEN); @@ -332,7 +330,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { struct fsnotify_group *group; - struct fsnotify_event *kevent; + struct fanotify_event *event; char __user *start; int ret; DEFINE_WAIT_FUNC(wait, woken_wake_function); @@ -344,13 +342,13 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, add_wait_queue(&group->notification_waitq, &wait); while (1) { - kevent = get_one_event(group, count); - if (IS_ERR(kevent)) { - ret = PTR_ERR(kevent); + event = get_one_event(group, count); + if (IS_ERR(event)) { + ret = PTR_ERR(event); break; } - if (!kevent) { + if (!event) { ret = -EAGAIN; if (file->f_flags & O_NONBLOCK) break; @@ -366,7 +364,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, continue; } - ret = copy_event_to_user(group, kevent, buf, count); + ret = copy_event_to_user(group, event, buf, count); if (unlikely(ret == -EOPENSTALE)) { /* * We cannot report events with stale fd so drop it. @@ -381,17 +379,17 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, * Permission events get queued to wait for response. Other * events can be destroyed now. */ - if (!fanotify_is_perm_event(FANOTIFY_E(kevent)->mask)) { - fsnotify_destroy_event(group, kevent); + if (!fanotify_is_perm_event(event->mask)) { + fsnotify_destroy_event(group, &event->fse); } else { if (ret <= 0) { spin_lock(&group->notification_lock); finish_permission_event(group, - FANOTIFY_PE(kevent), FAN_DENY); + FANOTIFY_PERM(event), FAN_DENY); wake_up(&group->fanotify_data.access_waitq); } else { spin_lock(&group->notification_lock); - list_add_tail(&kevent->list, + list_add_tail(&event->fse.list, &group->fanotify_data.access_list); spin_unlock(&group->notification_lock); } @@ -437,8 +435,6 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; - struct fanotify_perm_event *event; - struct fsnotify_event *fsn_event; /* * Stop new events from arriving in the notification queue. since @@ -453,6 +449,8 @@ static int fanotify_release(struct inode *ignored, struct file *file) */ spin_lock(&group->notification_lock); while (!list_empty(&group->fanotify_data.access_list)) { + struct fanotify_perm_event *event; + event = list_first_entry(&group->fanotify_data.access_list, struct fanotify_perm_event, fae.fse.list); list_del_init(&event->fae.fse.list); @@ -466,12 +464,14 @@ static int fanotify_release(struct inode *ignored, struct file *file) * response is consumed and fanotify_get_response() returns. */ while (!fsnotify_notify_queue_is_empty(group)) { - fsn_event = fsnotify_remove_first_event(group); - if (!(FANOTIFY_E(fsn_event)->mask & FANOTIFY_PERM_EVENTS)) { + struct fanotify_event *event; + + event = FANOTIFY_E(fsnotify_remove_first_event(group)); + if (!(event->mask & FANOTIFY_PERM_EVENTS)) { spin_unlock(&group->notification_lock); - fsnotify_destroy_event(group, fsn_event); + fsnotify_destroy_event(group, &event->fse); } else { - finish_permission_event(group, FANOTIFY_PE(fsn_event), + finish_permission_event(group, FANOTIFY_PERM(event), FAN_ALLOW); } spin_lock(&group->notification_lock); @@ -1136,7 +1136,10 @@ static int __init fanotify_user_setup(void) fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); - fanotify_event_cachep = KMEM_CACHE(fanotify_event, SLAB_PANIC); + fanotify_fid_event_cachep = KMEM_CACHE(fanotify_fid_event, + SLAB_PANIC); + fanotify_path_event_cachep = KMEM_CACHE(fanotify_path_event, + SLAB_PANIC); if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { fanotify_perm_event_cachep = KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); From 9e2ba2c34f1922ca1e0c7d31b30ace5842c2e7d1 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:19 +0200 Subject: [PATCH 12/17] fanotify: send FAN_DIR_MODIFY event flavor with dir inode and name Dirent events are going to be supported in two flavors: 1. Directory fid info + mask that includes the specific event types (e.g. FAN_CREATE) and an optional FAN_ONDIR flag. 2. Directory fid info + name + mask that includes only FAN_DIR_MODIFY. To request the second event flavor, user needs to set the event type FAN_DIR_MODIFY in the mark mask. The first flavor is supported since kernel v5.1 for groups initialized with flag FAN_REPORT_FID. It is intended to be used for watching directories in "batch mode" - the watcher is notified when directory is changed and re-scans the directory content in response. This event flavor is stored more compactly in the event queue, so it is optimal for workloads with frequent directory changes. The second event flavor is intended to be used for watching large directories, where the cost of re-scan of the directory on every change is considered too high. The watcher getting the event with the directory fid and entry name is expected to call fstatat(2) to query the content of the entry after the change. Legacy inotify events are reported with name and event mask (e.g. "foo", FAN_CREATE | FAN_ONDIR). That can lead users to the conclusion that there is *currently* an entry "foo" that is a sub-directory, when in fact "foo" may be negative or non-dir by the time user gets the event. To make it clear that the current state of the named entry is unknown, when reporting an event with name info, fanotify obfuscates the specific event types (e.g. create,delete,rename) and uses a common event type - FAN_DIR_MODIFY to describe the change. This should make it harder for users to make wrong assumptions and write buggy filesystem monitors. At this point, name info reporting is not yet implemented, so trying to set FAN_DIR_MODIFY in mark mask will return -EINVAL. Link: https://lore.kernel.org/r/20200319151022.31456-12-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 7 ++++--- fs/notify/fsnotify.c | 2 +- include/linux/fsnotify.h | 6 ++++++ include/linux/fsnotify_backend.h | 4 +++- include/uapi/linux/fanotify.h | 1 + 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 39eb71f7c413..74676228f784 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -235,9 +235,9 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, test_mask = event_mask & marks_mask & ~marks_ignored_mask; /* - * dirent modification events (create/delete/move) do not carry the - * child entry name/inode information. Instead, we report FAN_ONDIR - * for mkdir/rmdir so user can differentiate them from creat/unlink. + * For dirent modification events (create/delete/move) that do not carry + * the child entry name information, we report FAN_ONDIR for mkdir/rmdir + * so user can differentiate them from creat/unlink. * * For backward compatibility and consistency, do not report FAN_ONDIR * to user in legacy fanotify mode (reporting fd) and report FAN_ONDIR @@ -463,6 +463,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_MOVED_FROM != FS_MOVED_FROM); BUILD_BUG_ON(FAN_CREATE != FS_CREATE); BUILD_BUG_ON(FAN_DELETE != FS_DELETE); + BUILD_BUG_ON(FAN_DIR_MODIFY != FS_DIR_MODIFY); BUILD_BUG_ON(FAN_DELETE_SELF != FS_DELETE_SELF); BUILD_BUG_ON(FAN_MOVE_SELF != FS_MOVE_SELF); BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD); diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 193530f57963..72d332ce8e12 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -383,7 +383,7 @@ static __init int fsnotify_init(void) { int ret; - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 860018f3e545..5ab28f6c7d26 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -30,6 +30,12 @@ static inline void fsnotify_name(struct inode *dir, __u32 mask, const struct qstr *name, u32 cookie) { fsnotify(dir, mask, child, FSNOTIFY_EVENT_INODE, name, cookie); + /* + * Send another flavor of the event without child inode data and + * without the specific event type (e.g. FS_CREATE|FS_IS_DIR). + * The name is relative to the dir inode the event is reported to. + */ + fsnotify(dir, FS_DIR_MODIFY, dir, FSNOTIFY_EVENT_INODE, name, 0); } static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c72cbea20ef7..f0c506405b54 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -47,6 +47,7 @@ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ +#define FS_DIR_MODIFY 0x00080000 /* Directory entry was modified */ #define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */ /* This inode cares about things that happen to its children. Always set for @@ -66,7 +67,8 @@ * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event * when a directory entry inside a child subdir changes. */ -#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE) +#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | \ + FS_DIR_MODIFY) #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 2a1844edda47..615fa2c87179 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -24,6 +24,7 @@ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ #define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ +#define FAN_DIR_MODIFY 0x00080000 /* Directory entry was modified */ #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ From d766b553615ce67f33848ec4f6672b3158198a5c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:20 +0200 Subject: [PATCH 13/17] fanotify: prepare to report both parent and child fid's For some events, we are going to report both child and parent fid's, so pass fsid and file handle as arguments to copy_fid_to_user(), which is going to be called with parent and child file handles. Link: https://lore.kernel.org/r/20200319151022.31456-13-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify_user.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 6cb94a6bc980..3a11f6a0339f 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -52,6 +52,13 @@ struct kmem_cache *fanotify_perm_event_cachep __read_mostly; #define FANOTIFY_EVENT_ALIGN 4 +static int fanotify_fid_info_len(int fh_len) +{ + return roundup(sizeof(struct fanotify_event_info_fid) + + sizeof(struct file_handle) + fh_len, + FANOTIFY_EVENT_ALIGN); +} + static int fanotify_event_info_len(struct fanotify_event *event) { int fh_len = fanotify_event_object_fh_len(event); @@ -59,9 +66,7 @@ static int fanotify_event_info_len(struct fanotify_event *event) if (!fh_len) return 0; - return roundup(sizeof(struct fanotify_event_info_fid) + - sizeof(struct file_handle) + fh_len, - FANOTIFY_EVENT_ALIGN); + return fanotify_fid_info_len(fh_len); } /* @@ -199,14 +204,14 @@ static int process_access_response(struct fsnotify_group *group, return -ENOENT; } -static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) +static int copy_fid_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, + char __user *buf) { struct fanotify_event_info_fid info = { }; struct file_handle handle = { }; unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh_buf; - struct fanotify_fh *fh = fanotify_event_object_fh(event); size_t fh_len = fh->len; - size_t len = fanotify_event_info_len(event); + size_t len = fanotify_fid_info_len(fh_len); if (!len) return 0; @@ -217,7 +222,7 @@ static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) /* Copy event info fid header followed by vaiable sized file handle */ info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID; info.hdr.len = len; - info.fsid = *fanotify_event_fsid(event); + info.fsid = *fsid; if (copy_to_user(buf, &info, sizeof(info))) return -EFAULT; @@ -296,7 +301,9 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (f) { fd_install(fd, f); } else if (fanotify_event_has_fid(event)) { - ret = copy_fid_to_user(event, buf + FAN_EVENT_METADATA_LEN); + ret = copy_fid_to_user(fanotify_event_fsid(event), + fanotify_event_object_fh(event), + buf + FAN_EVENT_METADATA_LEN); if (ret < 0) return ret; } From 01affd5471dcab04c6cb0c2acaf132a20488f86f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 24 Mar 2020 19:35:07 +0100 Subject: [PATCH 14/17] fanotify: Drop fanotify_event_has_fid() When some events have directory id and some object id, fanotify_event_has_fid() becomes mostly useless and confusing because we usually need to know which type of file handle the event has. So just drop the function and use fanotify_event_object_fh() instead. Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 2 +- fs/notify/fanotify/fanotify.h | 5 ----- fs/notify/fanotify/fanotify_user.c | 4 ++-- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 74676228f784..ef39664e389c 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -387,7 +387,7 @@ init: __maybe_unused else event->pid = get_pid(task_tgid(current)); - if (fanotify_event_has_fid(event)) { + if (fanotify_event_object_fh(event)) { ffe->object_fh.len = 0; if (fsid) ffe->fsid = *fsid; diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 3b50ee44a0cd..eecf4be3bfd1 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -83,11 +83,6 @@ FANOTIFY_FE(struct fanotify_event *event) return container_of(event, struct fanotify_fid_event, fae); } -static inline bool fanotify_event_has_fid(struct fanotify_event *event) -{ - return event->type == FANOTIFY_EVENT_TYPE_FID; -} - static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event) { if (event->type == FANOTIFY_EVENT_TYPE_FID) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 3a11f6a0339f..b93585406aad 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -275,7 +275,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; metadata.pid = pid_vnr(event->pid); - if (fanotify_event_has_fid(event)) { + if (fanotify_event_object_fh(event)) { metadata.event_len += fanotify_event_info_len(event); } else if (path && path->mnt && path->dentry) { fd = create_fd(group, path, &f); @@ -300,7 +300,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (f) { fd_install(fd, f); - } else if (fanotify_event_has_fid(event)) { + } else if (fanotify_event_object_fh(event)) { ret = copy_fid_to_user(fanotify_event_fsid(event), fanotify_event_object_fh(event), buf + FAN_EVENT_METADATA_LEN); From cacfb956d46edc5d7a1165161bfc6ca3de9519d9 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:21 +0200 Subject: [PATCH 15/17] fanotify: record name info for FAN_DIR_MODIFY event For FAN_DIR_MODIFY event, allocate a variable size event struct to store the dir entry name along side the directory file handle. At this point, name info reporting is not yet implemented, so trying to set FAN_DIR_MODIFY in mark mask will return -EINVAL. Link: https://lore.kernel.org/r/20200319151022.31456-14-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 76 ++++++++++++++++++++++++++---- fs/notify/fanotify/fanotify.h | 40 +++++++++++++++- fs/notify/fanotify/fanotify_user.c | 4 +- 3 files changed, 108 insertions(+), 12 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index ef39664e389c..599654564b2a 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -53,6 +53,23 @@ static bool fanotify_fid_event_equal(struct fanotify_fid_event *ffe1, fanotify_fh_equal(&ffe1->object_fh, &ffe2->object_fh); } +static bool fanotify_name_event_equal(struct fanotify_name_event *fne1, + struct fanotify_name_event *fne2) +{ + /* + * Do not merge name events without dir fh. + * FAN_DIR_MODIFY does not encode object fh, so it may be empty. + */ + if (!fne1->dir_fh.len) + return false; + + if (fne1->name_len != fne2->name_len || + !fanotify_fh_equal(&fne1->dir_fh, &fne2->dir_fh)) + return false; + + return !memcmp(fne1->name, fne2->name, fne1->name_len); +} + static bool should_merge(struct fsnotify_event *old_fsn, struct fsnotify_event *new_fsn) { @@ -84,6 +101,9 @@ static bool should_merge(struct fsnotify_event *old_fsn, return fanotify_fid_event_equal(FANOTIFY_FE(old), FANOTIFY_FE(new)); + case FANOTIFY_EVENT_TYPE_FID_NAME: + return fanotify_name_event_equal(FANOTIFY_NE(old), + FANOTIFY_NE(new)); default: WARN_ON_ONCE(1); } @@ -262,6 +282,9 @@ static void fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, void *buf = fh->buf; int err; + if (!inode) + goto out; + dwords = 0; err = -ENOENT; type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL); @@ -295,6 +318,7 @@ out_err: type, bytes, err); kfree(ext_buf); *fanotify_fh_ext_buf_ptr(fh) = NULL; +out: /* Report the event without a file identifier on encode error */ fh->type = FILEID_INVALID; fh->len = 0; @@ -320,10 +344,12 @@ static struct inode *fanotify_fid_inode(struct inode *to_tell, u32 event_mask, struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const void *data, int data_type, + const struct qstr *file_name, __kernel_fsid_t *fsid) { struct fanotify_event *event = NULL; struct fanotify_fid_event *ffe = NULL; + struct fanotify_name_event *fne = NULL; gfp_t gfp = GFP_KERNEL_ACCOUNT; struct inode *id = fanotify_fid_inode(inode, mask, data, data_type); const struct path *path = fsnotify_data_path(data, data_type); @@ -356,6 +382,23 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, goto init; } + /* + * For FAN_DIR_MODIFY event, we report the fid of the directory and + * the name of the modified entry. + * Allocate an fanotify_name_event struct and copy the name. + */ + if (mask & FAN_DIR_MODIFY && !(WARN_ON_ONCE(!file_name))) { + fne = kmalloc(sizeof(*fne) + file_name->len + 1, gfp); + if (!fne) + goto out; + + event = &fne->fae; + event->type = FANOTIFY_EVENT_TYPE_FID_NAME; + fne->name_len = file_name->len; + strcpy(fne->name, file_name->name); + goto init; + } + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { ffe = kmem_cache_alloc(fanotify_fid_event_cachep, gfp); if (!ffe) @@ -374,7 +417,7 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, event->type = FANOTIFY_EVENT_TYPE_PATH; } -init: __maybe_unused +init: /* * Use the victim inode instead of the watching inode as the id for * event queue, so event reported on parent is merged with event @@ -387,13 +430,16 @@ init: __maybe_unused else event->pid = get_pid(task_tgid(current)); - if (fanotify_event_object_fh(event)) { - ffe->object_fh.len = 0; - if (fsid) - ffe->fsid = *fsid; - if (id) - fanotify_encode_fh(&ffe->object_fh, id, gfp); - } else if (fanotify_event_has_path(event)) { + if (fsid && fanotify_event_fsid(event)) + *fanotify_event_fsid(event) = *fsid; + + if (fanotify_event_object_fh(event)) + fanotify_encode_fh(fanotify_event_object_fh(event), id, gfp); + + if (fanotify_event_dir_fh(event)) + fanotify_encode_fh(fanotify_event_dir_fh(event), id, gfp); + + if (fanotify_event_has_path(event)) { struct path *p = fanotify_event_path(event); if (path) { @@ -501,7 +547,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, } event = fanotify_alloc_event(group, inode, mask, data, data_type, - &fsid); + file_name, &fsid); ret = -ENOMEM; if (unlikely(!event)) { /* @@ -563,6 +609,15 @@ static void fanotify_free_fid_event(struct fanotify_event *event) kmem_cache_free(fanotify_fid_event_cachep, ffe); } +static void fanotify_free_name_event(struct fanotify_event *event) +{ + struct fanotify_name_event *fne = FANOTIFY_NE(event); + + if (fanotify_fh_has_ext_buf(&fne->dir_fh)) + kfree(fanotify_fh_ext_buf(&fne->dir_fh)); + kfree(fne); +} + static void fanotify_free_event(struct fsnotify_event *fsn_event) { struct fanotify_event *event; @@ -579,6 +634,9 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) case FANOTIFY_EVENT_TYPE_FID: fanotify_free_fid_event(event); break; + case FANOTIFY_EVENT_TYPE_FID_NAME: + fanotify_free_name_event(event); + break; default: WARN_ON_ONCE(1); } diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index eecf4be3bfd1..35bfbf4a7aac 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -59,7 +59,8 @@ static inline void *fanotify_fh_buf(struct fanotify_fh *fh) * be freed and which concrete struct it may be cast to. */ enum fanotify_event_type { - FANOTIFY_EVENT_TYPE_FID, + FANOTIFY_EVENT_TYPE_FID, /* fixed length */ + FANOTIFY_EVENT_TYPE_FID_NAME, /* variable length */ FANOTIFY_EVENT_TYPE_PATH, FANOTIFY_EVENT_TYPE_PATH_PERM, }; @@ -83,10 +84,26 @@ FANOTIFY_FE(struct fanotify_event *event) return container_of(event, struct fanotify_fid_event, fae); } +struct fanotify_name_event { + struct fanotify_event fae; + __kernel_fsid_t fsid; + struct fanotify_fh dir_fh; + u8 name_len; + char name[0]; +}; + +static inline struct fanotify_name_event * +FANOTIFY_NE(struct fanotify_event *event) +{ + return container_of(event, struct fanotify_name_event, fae); +} + static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event) { if (event->type == FANOTIFY_EVENT_TYPE_FID) return &FANOTIFY_FE(event)->fsid; + else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME) + return &FANOTIFY_NE(event)->fsid; else return NULL; } @@ -100,6 +117,15 @@ static inline struct fanotify_fh *fanotify_event_object_fh( return NULL; } +static inline struct fanotify_fh *fanotify_event_dir_fh( + struct fanotify_event *event) +{ + if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME) + return &FANOTIFY_NE(event)->dir_fh; + else + return NULL; +} + static inline int fanotify_event_object_fh_len(struct fanotify_event *event) { struct fanotify_fh *fh = fanotify_event_object_fh(event); @@ -107,6 +133,17 @@ static inline int fanotify_event_object_fh_len(struct fanotify_event *event) return fh ? fh->len : 0; } +static inline bool fanotify_event_has_name(struct fanotify_event *event) +{ + return event->type == FANOTIFY_EVENT_TYPE_FID_NAME; +} + +static inline int fanotify_event_name_len(struct fanotify_event *event) +{ + return fanotify_event_has_name(event) ? + FANOTIFY_NE(event)->name_len : 0; +} + struct fanotify_path_event { struct fanotify_event fae; struct path path; @@ -169,4 +206,5 @@ static inline struct path *fanotify_event_path(struct fanotify_event *event) struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const void *data, int data_type, + const struct qstr *file_name, __kernel_fsid_t *fsid); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index b93585406aad..a9d287a56098 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -210,7 +210,7 @@ static int copy_fid_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, struct fanotify_event_info_fid info = { }; struct file_handle handle = { }; unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh_buf; - size_t fh_len = fh->len; + size_t fh_len = fh ? fh->len : 0; size_t len = fanotify_fid_info_len(fh_len); if (!len) @@ -828,7 +828,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->memcg = get_mem_cgroup_from_mm(current->mm); oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL, - FSNOTIFY_EVENT_NONE, NULL); + FSNOTIFY_EVENT_NONE, NULL, NULL); if (unlikely(!oevent)) { fd = -ENOMEM; goto out_destroy_group; From 44d705b0370b1d581f46ff23e5d33e8b5ff8ec58 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:22 +0200 Subject: [PATCH 16/17] fanotify: report name info for FAN_DIR_MODIFY event Report event FAN_DIR_MODIFY with name in a variable length record similar to how fid's are reported. With name info reporting implemented, setting FAN_DIR_MODIFY in mark mask is now allowed. When events are reported with name, the reported fid identifies the directory and the name follows the fid. The info record type for this event info is FAN_EVENT_INFO_TYPE_DFID_NAME. For now, all reported events have at most one info record which is either FAN_EVENT_INFO_TYPE_FID or FAN_EVENT_INFO_TYPE_DFID_NAME (for FAN_DIR_MODIFY). Later on, events "on child" will report both records. There are several ways that an application can use this information: 1. When watching a single directory, the name is always relative to the watched directory, so application need to fstatat(2) the name relative to the watched directory. 2. When watching a set of directories, the application could keep a map of dirfd for all watched directories and hash the map by fid obtained with name_to_handle_at(2). When getting a name event, the fid in the event info could be used to lookup the base dirfd in the map and then call fstatat(2) with that dirfd. 3. When watching a filesystem (FAN_MARK_FILESYSTEM) or a large set of directories, the application could use open_by_handle_at(2) with the fid in event info to obtain dirfd for the directory where event happened and call fstatat(2) with this dirfd. The last option scales better for a large number of watched directories. The first two options may be available in the future also for non privileged fanotify watchers, because open_by_handle_at(2) requires the CAP_DAC_READ_SEARCH capability. Link: https://lore.kernel.org/r/20200319151022.31456-15-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 2 +- fs/notify/fanotify/fanotify_user.c | 117 ++++++++++++++++++++++------- include/linux/fanotify.h | 3 +- include/uapi/linux/fanotify.h | 8 +- 4 files changed, 100 insertions(+), 30 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 599654564b2a..4c1a4eb597d5 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -520,7 +520,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC); BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 20); mask = fanotify_group_event_mask(group, iter_info, mask, data, data_type); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index a9d287a56098..42cb794c62ac 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -51,22 +51,35 @@ struct kmem_cache *fanotify_path_event_cachep __read_mostly; struct kmem_cache *fanotify_perm_event_cachep __read_mostly; #define FANOTIFY_EVENT_ALIGN 4 +#define FANOTIFY_INFO_HDR_LEN \ + (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle)) -static int fanotify_fid_info_len(int fh_len) +static int fanotify_fid_info_len(int fh_len, int name_len) { - return roundup(sizeof(struct fanotify_event_info_fid) + - sizeof(struct file_handle) + fh_len, - FANOTIFY_EVENT_ALIGN); + int info_len = fh_len; + + if (name_len) + info_len += name_len + 1; + + return roundup(FANOTIFY_INFO_HDR_LEN + info_len, FANOTIFY_EVENT_ALIGN); } static int fanotify_event_info_len(struct fanotify_event *event) { + int info_len = 0; int fh_len = fanotify_event_object_fh_len(event); - if (!fh_len) - return 0; + if (fh_len) + info_len += fanotify_fid_info_len(fh_len, 0); - return fanotify_fid_info_len(fh_len); + if (fanotify_event_name_len(event)) { + struct fanotify_name_event *fne = FANOTIFY_NE(event); + + info_len += fanotify_fid_info_len(fne->dir_fh.len, + fne->name_len); + } + + return info_len; } /* @@ -204,23 +217,32 @@ static int process_access_response(struct fsnotify_group *group, return -ENOENT; } -static int copy_fid_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, - char __user *buf) +static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, + const char *name, size_t name_len, + char __user *buf, size_t count) { struct fanotify_event_info_fid info = { }; struct file_handle handle = { }; unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh_buf; size_t fh_len = fh ? fh->len : 0; - size_t len = fanotify_fid_info_len(fh_len); + size_t info_len = fanotify_fid_info_len(fh_len, name_len); + size_t len = info_len; - if (!len) + pr_debug("%s: fh_len=%zu name_len=%zu, info_len=%zu, count=%zu\n", + __func__, fh_len, name_len, info_len, count); + + if (!fh_len || (name && !name_len)) return 0; - if (WARN_ON_ONCE(len < sizeof(info) + sizeof(handle) + fh_len)) + if (WARN_ON_ONCE(len < sizeof(info) || len > count)) return -EFAULT; - /* Copy event info fid header followed by vaiable sized file handle */ - info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID; + /* + * Copy event info fid header followed by variable sized file handle + * and optionally followed by variable sized filename. + */ + info.hdr.info_type = name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME : + FAN_EVENT_INFO_TYPE_FID; info.hdr.len = len; info.fsid = *fsid; if (copy_to_user(buf, &info, sizeof(info))) @@ -228,6 +250,9 @@ static int copy_fid_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, buf += sizeof(info); len -= sizeof(info); + if (WARN_ON_ONCE(len < sizeof(handle))) + return -EFAULT; + handle.handle_type = fh->type; handle.handle_bytes = fh_len; if (copy_to_user(buf, &handle, sizeof(handle))) @@ -235,9 +260,12 @@ static int copy_fid_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, buf += sizeof(handle); len -= sizeof(handle); + if (WARN_ON_ONCE(len < fh_len)) + return -EFAULT; + /* - * For an inline fh, copy through stack to exclude the copy from - * usercopy hardening protections. + * For an inline fh and inline file name, copy through stack to exclude + * the copy from usercopy hardening protections. */ fh_buf = fanotify_fh_buf(fh); if (fh_len <= FANOTIFY_INLINE_FH_LEN) { @@ -247,14 +275,28 @@ static int copy_fid_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, if (copy_to_user(buf, fh_buf, fh_len)) return -EFAULT; - /* Pad with 0's */ buf += fh_len; len -= fh_len; + + if (name_len) { + /* Copy the filename with terminating null */ + name_len++; + if (WARN_ON_ONCE(len < name_len)) + return -EFAULT; + + if (copy_to_user(buf, name, name_len)) + return -EFAULT; + + buf += name_len; + len -= name_len; + } + + /* Pad with 0's */ WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); if (len > 0 && clear_user(buf, len)) return -EFAULT; - return 0; + return info_len; } static ssize_t copy_event_to_user(struct fsnotify_group *group, @@ -268,16 +310,15 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - metadata.event_len = FAN_EVENT_METADATA_LEN; + metadata.event_len = FAN_EVENT_METADATA_LEN + + fanotify_event_info_len(event); metadata.metadata_len = FAN_EVENT_METADATA_LEN; metadata.vers = FANOTIFY_METADATA_VERSION; metadata.reserved = 0; metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; metadata.pid = pid_vnr(event->pid); - if (fanotify_event_object_fh(event)) { - metadata.event_len += fanotify_event_info_len(event); - } else if (path && path->mnt && path->dentry) { + if (path && path->mnt && path->dentry) { fd = create_fd(group, path, &f); if (fd < 0) return fd; @@ -295,17 +336,39 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN)) goto out_close_fd; + buf += FAN_EVENT_METADATA_LEN; + count -= FAN_EVENT_METADATA_LEN; + if (fanotify_is_perm_event(event->mask)) FANOTIFY_PERM(event)->fd = fd; - if (f) { + if (f) fd_install(fd, f); - } else if (fanotify_event_object_fh(event)) { - ret = copy_fid_to_user(fanotify_event_fsid(event), - fanotify_event_object_fh(event), - buf + FAN_EVENT_METADATA_LEN); + + /* Event info records order is: dir fid + name, child fid */ + if (fanotify_event_name_len(event)) { + struct fanotify_name_event *fne = FANOTIFY_NE(event); + + ret = copy_info_to_user(fanotify_event_fsid(event), + fanotify_event_dir_fh(event), + fne->name, fne->name_len, + buf, count); if (ret < 0) return ret; + + buf += ret; + count -= ret; + } + + if (fanotify_event_object_fh_len(event)) { + ret = copy_info_to_user(fanotify_event_fsid(event), + fanotify_event_object_fh(event), + NULL, 0, buf, count); + if (ret < 0) + return ret; + + buf += ret; + count -= ret; } return metadata.event_len; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index b79fa9bb7359..3049a6c06d9e 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -47,7 +47,8 @@ * Directory entry modification events - reported only to directory * where entry is modified and not to a watching parent. */ -#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE) +#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \ + FAN_DIR_MODIFY) /* Events that can only be reported with data type FSNOTIFY_EVENT_INODE */ #define FANOTIFY_INODE_EVENTS (FANOTIFY_DIRENT_EVENTS | \ diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 615fa2c87179..a88c7c6d0692 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -117,6 +117,7 @@ struct fanotify_event_metadata { }; #define FAN_EVENT_INFO_TYPE_FID 1 +#define FAN_EVENT_INFO_TYPE_DFID_NAME 2 /* Variable length info record following event metadata */ struct fanotify_event_info_header { @@ -125,7 +126,12 @@ struct fanotify_event_info_header { __u16 len; }; -/* Unique file identifier info record */ +/* + * Unique file identifier info record. This is used both for + * FAN_EVENT_INFO_TYPE_FID records and for FAN_EVENT_INFO_TYPE_DFID_NAME + * records. For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null + * terminated name immediately after the file handle. + */ struct fanotify_event_info_fid { struct fanotify_event_info_header hdr; __kernel_fsid_t fsid; From 6def1a1d2d58eda5834fe2e2ace4560f9cdd7de1 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 27 Mar 2020 10:10:30 -0700 Subject: [PATCH 17/17] fanotify: Fix the checks in fanotify_fsid_equal Clang warns: fs/notify/fanotify/fanotify.c:28:23: warning: self-comparison always evaluates to true [-Wtautological-compare] return fsid1->val[0] == fsid1->val[0] && fsid2->val[1] == fsid2->val[1]; ^ fs/notify/fanotify/fanotify.c:28:57: warning: self-comparison always evaluates to true [-Wtautological-compare] return fsid1->val[0] == fsid1->val[0] && fsid2->val[1] == fsid2->val[1]; ^ 2 warnings generated. The intention was clearly to compare val[0] and val[1] in the two different fsid structs. Fix it otherwise this function always returns true. Fixes: afc894c784c8 ("fanotify: Store fanotify handles differently") Link: https://github.com/ClangBuiltLinux/linux/issues/952 Link: https://lore.kernel.org/r/20200327171030.30625-1-natechancellor@gmail.com Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 4c1a4eb597d5..5435a40f82be 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -25,7 +25,7 @@ static bool fanotify_path_equal(struct path *p1, struct path *p2) static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1, __kernel_fsid_t *fsid2) { - return fsid1->val[0] == fsid1->val[0] && fsid2->val[1] == fsid2->val[1]; + return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1]; } static bool fanotify_fh_equal(struct fanotify_fh *fh1,