From d7a15f8d0777955986a2ab00ab181795cab14b01 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers3@gmail.com>
Date: Sun, 16 Mar 2014 14:24:08 -0500
Subject: [PATCH 1/4] vfs: atomic f_pos access in llseek()

Commit 9c225f2655e36a4 ("vfs: atomic f_pos accesses as per POSIX") changed
several system calls to use fdget_pos() instead of fdget(), but missed
sys_llseek().  Fix it.

Signed-off-by: Eric Biggers <ebiggers3@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/read_write.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 54e19b9392dc..28cc9c810744 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -307,7 +307,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 		unsigned int, whence)
 {
 	int retval;
-	struct fd f = fdget(fd);
+	struct fd f = fdget_pos(fd);
 	loff_t offset;
 
 	if (!f.file)
@@ -327,7 +327,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 			retval = 0;
 	}
 out_putf:
-	fdput(f);
+	fdput_pos(f);
 	return retval;
 }
 #endif

From 99aea68134f3c2a27b4d463c91cfa298c3efaccf Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers3@gmail.com>
Date: Sun, 16 Mar 2014 15:47:48 -0500
Subject: [PATCH 2/4] vfs: Don't let __fdget_pos() get FMODE_PATH files

Commit bd2a31d522344 ("get rid of fget_light()") introduced the
__fdget_pos() function, which returns the resulting file pointer and
fdput flags combined in an 'unsigned long'.  However, it also changed the
behavior to return files with FMODE_PATH set, which shouldn't happen
because read(), write(), lseek(), etc. aren't allowed on such files.
This commit restores the old behavior.

This regression actually had no effect on read() and write() since
FMODE_READ and FMODE_WRITE are not set on file descriptors opened with
O_PATH, but it did cause lseek() on a file descriptor opened with O_PATH
to fail with ESPIPE rather than EBADF.

Signed-off-by: Eric Biggers <ebiggers3@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index 60a45e9f5323..eb56a13dab3e 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -713,27 +713,16 @@ unsigned long __fdget_raw(unsigned int fd)
 
 unsigned long __fdget_pos(unsigned int fd)
 {
-	struct files_struct *files = current->files;
-	struct file *file;
-	unsigned long v;
+	unsigned long v = __fdget(fd);
+	struct file *file = (struct file *)(v & ~3);
 
-	if (atomic_read(&files->count) == 1) {
-		file = __fcheck_files(files, fd);
-		v = 0;
-	} else {
-		file = __fget(fd, 0);
-		v = FDPUT_FPUT;
-	}
-	if (!file)
-		return 0;
-
-	if (file->f_mode & FMODE_ATOMIC_POS) {
+	if (file && (file->f_mode & FMODE_ATOMIC_POS)) {
 		if (file_count(file) > 1) {
 			v |= FDPUT_POS_UNLOCK;
 			mutex_lock(&file->f_pos_lock);
 		}
 	}
-	return v | (unsigned long)file;
+	return v;
 }
 
 /*

From e825196d48d2b89a6ec3a8eff280098d2a78207e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 23 Mar 2014 00:28:40 -0400
Subject: [PATCH 3/4] make prepend_name() work correctly when called with
 negative *buflen

In all callchains leading to prepend_name(), the value left in *buflen
is eventually discarded unused if prepend_name() has returned a negative.
So we are free to do what prepend() does, and subtract from *buflen
*before* checking for underflow (which turns into checking the sign
of subtraction result, of course).

Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 265e0ce9769c..ca02c13a84aa 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2833,9 +2833,9 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
 	u32 dlen = ACCESS_ONCE(name->len);
 	char *p;
 
-	if (*buflen < dlen + 1)
-		return -ENAMETOOLONG;
 	*buflen -= dlen + 1;
+	if (*buflen < 0)
+		return -ENAMETOOLONG;
 	p = *buffer -= dlen + 1;
 	*p++ = '/';
 	while (dlen--) {

From b37199e626b31e1175fb06764c5d1d687723aac2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 20 Mar 2014 15:18:22 -0400
Subject: [PATCH 4/4] rcuwalk: recheck mount_lock after mountpoint crossing
 attempts

We can get false negative from __lookup_mnt() if an unrelated vfsmount
gets moved.  In that case legitimize_mnt() is guaranteed to fail,
and we will fall back to non-RCU walk... unless we end up running
into a hard error on a filesystem object we wouldn't have reached
if not for that false negative.  IOW, delaying that check until
the end of pathname resolution is wrong - we should recheck right
after we attempt to cross the mountpoint.  We don't need to recheck
unless we see d_mountpoint() being true - in that case even if
we have just raced with mount/umount, we can simply go on as if
we'd come at the moment when the sucker wasn't a mountpoint; if we
run into a hard error as the result, it was a legitimate outcome.
__lookup_mnt() returning NULL is different in that respect, since
it might've happened due to operation on completely unrelated
mountpoint.

Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 2f730ef9b4b3..4b491b431990 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1109,7 +1109,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
 			return false;
 
 		if (!d_mountpoint(path->dentry))
-			break;
+			return true;
 
 		mounted = __lookup_mnt(path->mnt, path->dentry);
 		if (!mounted)
@@ -1125,20 +1125,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
 		 */
 		*inode = path->dentry->d_inode;
 	}
-	return true;
-}
-
-static void follow_mount_rcu(struct nameidata *nd)
-{
-	while (d_mountpoint(nd->path.dentry)) {
-		struct mount *mounted;
-		mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
-		if (!mounted)
-			break;
-		nd->path.mnt = &mounted->mnt;
-		nd->path.dentry = mounted->mnt.mnt_root;
-		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
-	}
+	return read_seqretry(&mount_lock, nd->m_seq);
 }
 
 static int follow_dotdot_rcu(struct nameidata *nd)
@@ -1166,7 +1153,17 @@ static int follow_dotdot_rcu(struct nameidata *nd)
 			break;
 		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
 	}
-	follow_mount_rcu(nd);
+	while (d_mountpoint(nd->path.dentry)) {
+		struct mount *mounted;
+		mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
+		if (!mounted)
+			break;
+		nd->path.mnt = &mounted->mnt;
+		nd->path.dentry = mounted->mnt.mnt_root;
+		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
+		if (!read_seqretry(&mount_lock, nd->m_seq))
+			goto failed;
+	}
 	nd->inode = nd->path.dentry->d_inode;
 	return 0;