alistair23-linux/include/linux/fdtable.h
Christian Brauner 60997c3d45
close_range: add CLOSE_RANGE_UNSHARE
One of the use-cases of close_range() is to drop file descriptors just before
execve(). This would usually be expressed in the sequence:

unshare(CLONE_FILES);
close_range(3, ~0U);

as pointed out by Linus it might be desirable to have this be a part of
close_range() itself under a new flag CLOSE_RANGE_UNSHARE.

This expands {dup,unshare)_fd() to take a max_fds argument that indicates the
maximum number of file descriptors to copy from the old struct files. When the
user requests that all file descriptors are supposed to be closed via
close_range(min, max) then we can cap via unshare_fd(min) and hence don't need
to do any of the heavy fput() work for everything above min.

The patch makes it so that if CLOSE_RANGE_UNSHARE is requested and we do in
fact currently share our file descriptor table we create a new private copy.
We then close all fds in the requested range and finally after we're done we
install the new fd table.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
2020-06-17 00:07:38 +02:00

133 lines
3.6 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
* descriptor table internals; you almost certainly want file.h instead.
*/
#ifndef __LINUX_FDTABLE_H
#define __LINUX_FDTABLE_H
#include <linux/posix_types.h>
#include <linux/compiler.h>
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
#include <linux/nospec.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/atomic.h>
/*
* The default fd array needs to be at least BITS_PER_LONG,
* as this is the granularity returned by copy_fdset().
*/
#define NR_OPEN_DEFAULT BITS_PER_LONG
#define NR_OPEN_MAX ~0U
struct fdtable {
unsigned int max_fds;
struct file __rcu **fd; /* current fd array */
unsigned long *close_on_exec;
unsigned long *open_fds;
unsigned long *full_fds_bits;
struct rcu_head rcu;
};
static inline bool close_on_exec(unsigned int fd, const struct fdtable *fdt)
{
return test_bit(fd, fdt->close_on_exec);
}
static inline bool fd_is_open(unsigned int fd, const struct fdtable *fdt)
{
return test_bit(fd, fdt->open_fds);
}
/*
* Open file table structure
*/
struct files_struct {
/*
* read mostly part
*/
atomic_t count;
bool resize_in_progress;
wait_queue_head_t resize_wait;
struct fdtable __rcu *fdt;
struct fdtable fdtab;
/*
* written part on a separate cache line in SMP
*/
spinlock_t file_lock ____cacheline_aligned_in_smp;
unsigned int next_fd;
unsigned long close_on_exec_init[1];
unsigned long open_fds_init[1];
unsigned long full_fds_bits_init[1];
struct file __rcu * fd_array[NR_OPEN_DEFAULT];
};
struct file_operations;
struct vfsmount;
struct dentry;
#define rcu_dereference_check_fdtable(files, fdtfd) \
rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock))
#define files_fdtable(files) \
rcu_dereference_check_fdtable((files), (files)->fdt)
/*
* The caller must ensure that fd table isn't shared or hold rcu or file lock
*/
static inline struct file *__fcheck_files(struct files_struct *files, unsigned int fd)
{
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
if (fd < fdt->max_fds) {
fd = array_index_nospec(fd, fdt->max_fds);
return rcu_dereference_raw(fdt->fd[fd]);
}
return NULL;
}
static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd)
{
RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
!lockdep_is_held(&files->file_lock),
"suspicious rcu_dereference_check() usage");
return __fcheck_files(files, fd);
}
/*
* Check whether the specified fd has an open file.
*/
#define fcheck(fd) fcheck_files(current->files, fd)
struct task_struct;
struct files_struct *get_files_struct(struct task_struct *);
void put_files_struct(struct files_struct *fs);
void reset_files_struct(struct files_struct *);
int unshare_files(struct files_struct **);
struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy;
void do_close_on_exec(struct files_struct *);
int iterate_fd(struct files_struct *, unsigned,
int (*)(const void *, struct file *, unsigned),
const void *);
extern int __alloc_fd(struct files_struct *files,
unsigned start, unsigned end, unsigned flags);
extern void __fd_install(struct files_struct *files,
unsigned int fd, struct file *file);
extern int __close_fd(struct files_struct *files,
unsigned int fd);
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
extern int __close_fd_get_file(unsigned int fd, struct file **res);
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
struct files_struct **new_fdp);
extern struct kmem_cache *files_cachep;
#endif /* __LINUX_FDTABLE_H */