From 9f79b78ef74436c7507bac6bfb7b8b989263bccb Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 21 May 2016 21:59:07 -0700
Subject: [PATCH 1/2] Convert filldir[64]() from __put_user() to
 unsafe_put_user()

We really should avoid the "__{get,put}_user()" functions entirely,
because they can easily be mis-used and the original intent of being
used for simple direct user accesses no longer holds in a post-SMAP/PAN
world.

Manually optimizing away the user access range check makes no sense any
more, when the range check is generally much cheaper than the "enable
user accesses" code that the __{get,put}_user() functions still need.

So instead of __put_user(), use the unsafe_put_user() interface with
user_access_{begin,end}() that really does generate better code these
days, and which is generally a nicer interface.  Under some loads, the
multiple user writes that filldir() does are actually quite noticeable.

This also makes the dirent name copy use unsafe_put_user() with a couple
of macros.  We do not want to make function calls with SMAP/PAN
disabled, and the code this generates is quite good when the
architecture uses "asm goto" for unsafe_put_user() like x86 does.

Note that this doesn't bother with the legacy cases.  Nobody should use
them anyway, so performance doesn't really matter there.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/readdir.c | 128 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 93 insertions(+), 35 deletions(-)

diff --git a/fs/readdir.c b/fs/readdir.c
index 2f6a4534e0df..579c8ea894ae 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -20,9 +20,63 @@
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
 #include <linux/compat.h>
-
 #include <linux/uaccess.h>
 
+#include <asm/unaligned.h>
+
+/*
+ * Note the "unsafe_put_user() semantics: we goto a
+ * label for errors.
+ *
+ * Also note how we use a "while()" loop here, even though
+ * only the biggest size needs to loop. The compiler (well,
+ * at least gcc) is smart enough to turn the smaller sizes
+ * into just if-statements, and this way we don't need to
+ * care whether 'u64' or 'u32' is the biggest size.
+ */
+#define unsafe_copy_loop(dst, src, len, type, label) 		\
+	while (len >= sizeof(type)) {				\
+		unsafe_put_user(get_unaligned((type *)src),	\
+			(type __user *)dst, label);		\
+		dst += sizeof(type);				\
+		src += sizeof(type);				\
+		len -= sizeof(type);				\
+	}
+
+/*
+ * We avoid doing 64-bit copies on 32-bit architectures. They
+ * might be better, but the component names are mostly small,
+ * and the 64-bit cases can end up being much more complex and
+ * put much more register pressure on the code, so it's likely
+ * not worth the pain of unaligned accesses etc.
+ *
+ * So limit the copies to "unsigned long" size. I did verify
+ * that at least the x86-32 case is ok without this limiting,
+ * but I worry about random other legacy 32-bit cases that
+ * might not do as well.
+ */
+#define unsafe_copy_type(dst, src, len, type, label) do {	\
+	if (sizeof(type) <= sizeof(unsigned long))		\
+		unsafe_copy_loop(dst, src, len, type, label);	\
+} while (0)
+
+/*
+ * Copy the dirent name to user space, and NUL-terminate
+ * it. This should not be a function call, since we're doing
+ * the copy inside a "user_access_begin/end()" section.
+ */
+#define unsafe_copy_dirent_name(_dst, _src, _len, label) do {	\
+	char __user *dst = (_dst);				\
+	const char *src = (_src);				\
+	size_t len = (_len);					\
+	unsafe_copy_type(dst, src, len, u64, label);	 	\
+	unsafe_copy_type(dst, src, len, u32, label);		\
+	unsafe_copy_type(dst, src, len, u16, label);		\
+	unsafe_copy_type(dst, src, len, u8,  label);		\
+	unsafe_put_user(0, dst, label);				\
+} while (0)
+
+
 int iterate_dir(struct file *file, struct dir_context *ctx)
 {
 	struct inode *inode = file_inode(file);
@@ -182,28 +236,31 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
 		return -EOVERFLOW;
 	}
 	dirent = buf->previous;
-	if (dirent) {
-		if (signal_pending(current))
-			return -EINTR;
-		if (__put_user(offset, &dirent->d_off))
-			goto efault;
-	}
+	if (dirent && signal_pending(current))
+		return -EINTR;
+
+	/*
+	 * Note! This range-checks 'previous' (which may be NULL).
+	 * The real range was checked in getdents
+	 */
+	if (!user_access_begin(dirent, sizeof(*dirent)))
+		goto efault;
+	if (dirent)
+		unsafe_put_user(offset, &dirent->d_off, efault_end);
 	dirent = buf->current_dir;
-	if (__put_user(d_ino, &dirent->d_ino))
-		goto efault;
-	if (__put_user(reclen, &dirent->d_reclen))
-		goto efault;
-	if (copy_to_user(dirent->d_name, name, namlen))
-		goto efault;
-	if (__put_user(0, dirent->d_name + namlen))
-		goto efault;
-	if (__put_user(d_type, (char __user *) dirent + reclen - 1))
-		goto efault;
+	unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
+	unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
+	unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
+	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
+	user_access_end();
+
 	buf->previous = dirent;
 	dirent = (void __user *)dirent + reclen;
 	buf->current_dir = dirent;
 	buf->count -= reclen;
 	return 0;
+efault_end:
+	user_access_end();
 efault:
 	buf->error = -EFAULT;
 	return -EFAULT;
@@ -263,30 +320,31 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
 	if (reclen > buf->count)
 		return -EINVAL;
 	dirent = buf->previous;
-	if (dirent) {
-		if (signal_pending(current))
-			return -EINTR;
-		if (__put_user(offset, &dirent->d_off))
-			goto efault;
-	}
+	if (dirent && signal_pending(current))
+		return -EINTR;
+
+	/*
+	 * Note! This range-checks 'previous' (which may be NULL).
+	 * The real range was checked in getdents
+	 */
+	if (!user_access_begin(dirent, sizeof(*dirent)))
+		goto efault;
+	if (dirent)
+		unsafe_put_user(offset, &dirent->d_off, efault_end);
 	dirent = buf->current_dir;
-	if (__put_user(ino, &dirent->d_ino))
-		goto efault;
-	if (__put_user(0, &dirent->d_off))
-		goto efault;
-	if (__put_user(reclen, &dirent->d_reclen))
-		goto efault;
-	if (__put_user(d_type, &dirent->d_type))
-		goto efault;
-	if (copy_to_user(dirent->d_name, name, namlen))
-		goto efault;
-	if (__put_user(0, dirent->d_name + namlen))
-		goto efault;
+	unsafe_put_user(ino, &dirent->d_ino, efault_end);
+	unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
+	unsafe_put_user(d_type, &dirent->d_type, efault_end);
+	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
+	user_access_end();
+
 	buf->previous = dirent;
 	dirent = (void __user *)dirent + reclen;
 	buf->current_dir = dirent;
 	buf->count -= reclen;
 	return 0;
+efault_end:
+	user_access_end();
 efault:
 	buf->error = -EFAULT;
 	return -EFAULT;

From 8a23eb804ca4f2be909e372cf5a9e7b30ae476cd Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 5 Oct 2019 11:32:52 -0700
Subject: [PATCH 2/2] Make filldir[64]() verify the directory entry filename is
 valid

This has been discussed several times, and now filesystem people are
talking about doing it individually at the filesystem layer, so head
that off at the pass and just do it in getdents{64}().

This is partially based on a patch by Jann Horn, but checks for NUL
bytes as well, and somewhat simplified.

There's also commentary about how it might be better if invalid names
due to filesystem corruption don't cause an immediate failure, but only
an error at the end of the readdir(), so that people can still see the
filenames that are ok.

There's also been discussion about just how much POSIX strictly speaking
requires this since it's about filesystem corruption.  It's really more
"protect user space from bad behavior" as pointed out by Jann.  But
since Eric Biederman looked up the POSIX wording, here it is for context:

 "From readdir:

   The readdir() function shall return a pointer to a structure
   representing the directory entry at the current position in the
   directory stream specified by the argument dirp, and position the
   directory stream at the next entry. It shall return a null pointer
   upon reaching the end of the directory stream. The structure dirent
   defined in the <dirent.h> header describes a directory entry.

  From definitions:

   3.129 Directory Entry (or Link)

   An object that associates a filename with a file. Several directory
   entries can associate names with the same file.

  ...

   3.169 Filename

   A name consisting of 1 to {NAME_MAX} bytes used to name a file. The
   characters composing the name may be selected from the set of all
   character values excluding the slash character and the null byte. The
   filenames dot and dot-dot have special meaning. A filename is
   sometimes referred to as a 'pathname component'."

Note that I didn't bother adding the checks to any legacy interfaces
that nobody uses.

Also note that if this ends up being noticeable as a performance
regression, we can fix that to do a much more optimized model that
checks for both NUL and '/' at the same time one word at a time.

We haven't really tended to optimize 'memchr()', and it only checks for
one pattern at a time anyway, and we really _should_ check for NUL too
(but see the comment about "soft errors" in the code about why it
currently only checks for '/')

See the CONFIG_DCACHE_WORD_ACCESS case of hash_name() for how the name
lookup code looks for pathname terminating characters in parallel.

Link: https://lore.kernel.org/lkml/20190118161440.220134-2-jannh@google.com/
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Jann Horn <jannh@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/readdir.c | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/fs/readdir.c b/fs/readdir.c
index 579c8ea894ae..19bea591c3f1 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -118,6 +118,40 @@ out:
 }
 EXPORT_SYMBOL(iterate_dir);
 
+/*
+ * POSIX says that a dirent name cannot contain NULL or a '/'.
+ *
+ * It's not 100% clear what we should really do in this case.
+ * The filesystem is clearly corrupted, but returning a hard
+ * error means that you now don't see any of the other names
+ * either, so that isn't a perfect alternative.
+ *
+ * And if you return an error, what error do you use? Several
+ * filesystems seem to have decided on EUCLEAN being the error
+ * code for EFSCORRUPTED, and that may be the error to use. Or
+ * just EIO, which is perhaps more obvious to users.
+ *
+ * In order to see the other file names in the directory, the
+ * caller might want to make this a "soft" error: skip the
+ * entry, and return the error at the end instead.
+ *
+ * Note that this should likely do a "memchr(name, 0, len)"
+ * check too, since that would be filesystem corruption as
+ * well. However, that case can't actually confuse user space,
+ * which has to do a strlen() on the name anyway to find the
+ * filename length, and the above "soft error" worry means
+ * that it's probably better left alone until we have that
+ * issue clarified.
+ */
+static int verify_dirent_name(const char *name, int len)
+{
+	if (WARN_ON_ONCE(!len))
+		return -EIO;
+	if (WARN_ON_ONCE(memchr(name, '/', len)))
+		return -EIO;
+	return 0;
+}
+
 /*
  * Traditional linux readdir() handling..
  *
@@ -227,6 +261,9 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
 	int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
 		sizeof(long));
 
+	buf->error = verify_dirent_name(name, namlen);
+	if (unlikely(buf->error))
+		return buf->error;
 	buf->error = -EINVAL;	/* only used if we fail.. */
 	if (reclen > buf->count)
 		return -EINVAL;
@@ -316,6 +353,9 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
 	int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
 		sizeof(u64));
 
+	buf->error = verify_dirent_name(name, namlen);
+	if (unlikely(buf->error))
+		return buf->error;
 	buf->error = -EINVAL;	/* only used if we fail.. */
 	if (reclen > buf->count)
 		return -EINVAL;