From d7a15f8d0777955986a2ab00ab181795cab14b01 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 16 Mar 2014 14:24:08 -0500 Subject: [PATCH 1/4] vfs: atomic f_pos access in llseek() Commit 9c225f2655e36a4 ("vfs: atomic f_pos accesses as per POSIX") changed several system calls to use fdget_pos() instead of fdget(), but missed sys_llseek(). Fix it. Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- fs/read_write.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 54e19b9392dc..28cc9c810744 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -307,7 +307,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, unsigned int, whence) { int retval; - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); loff_t offset; if (!f.file) @@ -327,7 +327,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, retval = 0; } out_putf: - fdput(f); + fdput_pos(f); return retval; } #endif From 99aea68134f3c2a27b4d463c91cfa298c3efaccf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 16 Mar 2014 15:47:48 -0500 Subject: [PATCH 2/4] vfs: Don't let __fdget_pos() get FMODE_PATH files Commit bd2a31d522344 ("get rid of fget_light()") introduced the __fdget_pos() function, which returns the resulting file pointer and fdput flags combined in an 'unsigned long'. However, it also changed the behavior to return files with FMODE_PATH set, which shouldn't happen because read(), write(), lseek(), etc. aren't allowed on such files. This commit restores the old behavior. This regression actually had no effect on read() and write() since FMODE_READ and FMODE_WRITE are not set on file descriptors opened with O_PATH, but it did cause lseek() on a file descriptor opened with O_PATH to fail with ESPIPE rather than EBADF. Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- fs/file.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/fs/file.c b/fs/file.c index 60a45e9f5323..eb56a13dab3e 100644 --- a/fs/file.c +++ b/fs/file.c @@ -713,27 +713,16 @@ unsigned long __fdget_raw(unsigned int fd) unsigned long __fdget_pos(unsigned int fd) { - struct files_struct *files = current->files; - struct file *file; - unsigned long v; + unsigned long v = __fdget(fd); + struct file *file = (struct file *)(v & ~3); - if (atomic_read(&files->count) == 1) { - file = __fcheck_files(files, fd); - v = 0; - } else { - file = __fget(fd, 0); - v = FDPUT_FPUT; - } - if (!file) - return 0; - - if (file->f_mode & FMODE_ATOMIC_POS) { + if (file && (file->f_mode & FMODE_ATOMIC_POS)) { if (file_count(file) > 1) { v |= FDPUT_POS_UNLOCK; mutex_lock(&file->f_pos_lock); } } - return v | (unsigned long)file; + return v; } /* From e825196d48d2b89a6ec3a8eff280098d2a78207e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 23 Mar 2014 00:28:40 -0400 Subject: [PATCH 3/4] make prepend_name() work correctly when called with negative *buflen In all callchains leading to prepend_name(), the value left in *buflen is eventually discarded unused if prepend_name() has returned a negative. So we are free to do what prepend() does, and subtract from *buflen *before* checking for underflow (which turns into checking the sign of subtraction result, of course). Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/dcache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 265e0ce9769c..ca02c13a84aa 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2833,9 +2833,9 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) u32 dlen = ACCESS_ONCE(name->len); char *p; - if (*buflen < dlen + 1) - return -ENAMETOOLONG; *buflen -= dlen + 1; + if (*buflen < 0) + return -ENAMETOOLONG; p = *buffer -= dlen + 1; *p++ = '/'; while (dlen--) { From b37199e626b31e1175fb06764c5d1d687723aac2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 20 Mar 2014 15:18:22 -0400 Subject: [PATCH 4/4] rcuwalk: recheck mount_lock after mountpoint crossing attempts We can get false negative from __lookup_mnt() if an unrelated vfsmount gets moved. In that case legitimize_mnt() is guaranteed to fail, and we will fall back to non-RCU walk... unless we end up running into a hard error on a filesystem object we wouldn't have reached if not for that false negative. IOW, delaying that check until the end of pathname resolution is wrong - we should recheck right after we attempt to cross the mountpoint. We don't need to recheck unless we see d_mountpoint() being true - in that case even if we have just raced with mount/umount, we can simply go on as if we'd come at the moment when the sucker wasn't a mountpoint; if we run into a hard error as the result, it was a legitimate outcome. __lookup_mnt() returning NULL is different in that respect, since it might've happened due to operation on completely unrelated mountpoint. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/namei.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 2f730ef9b4b3..4b491b431990 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1109,7 +1109,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, return false; if (!d_mountpoint(path->dentry)) - break; + return true; mounted = __lookup_mnt(path->mnt, path->dentry); if (!mounted) @@ -1125,20 +1125,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, */ *inode = path->dentry->d_inode; } - return true; -} - -static void follow_mount_rcu(struct nameidata *nd) -{ - while (d_mountpoint(nd->path.dentry)) { - struct mount *mounted; - mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); - if (!mounted) - break; - nd->path.mnt = &mounted->mnt; - nd->path.dentry = mounted->mnt.mnt_root; - nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); - } + return read_seqretry(&mount_lock, nd->m_seq); } static int follow_dotdot_rcu(struct nameidata *nd) @@ -1166,7 +1153,17 @@ static int follow_dotdot_rcu(struct nameidata *nd) break; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } - follow_mount_rcu(nd); + while (d_mountpoint(nd->path.dentry)) { + struct mount *mounted; + mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); + if (!mounted) + break; + nd->path.mnt = &mounted->mnt; + nd->path.dentry = mounted->mnt.mnt_root; + nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); + if (!read_seqretry(&mount_lock, nd->m_seq)) + goto failed; + } nd->inode = nd->path.dentry->d_inode; return 0;