alistair23-linux/include/linux/fdtable.h
Linus Torvalds f3f86e33dc vfs: Fix pathological performance case for __alloc_fd()
Al Viro points out that:
> >     * [Linux-specific aside] our __alloc_fd() can degrade quite badly
> > with some use patterns.  The cacheline pingpong in the bitmap is probably
> > inevitable, unless we accept considerably heavier memory footprint,
> > but we also have a case when alloc_fd() takes O(n) and it's _not_ hard
> > to trigger - close(3);open(...); will have the next open() after that
> > scanning the entire in-use bitmap.

And Eric Dumazet has a somewhat realistic multithreaded microbenchmark
that opens and closes a lot of sockets with minimal work per socket.

This patch largely fixes it.  We keep a 2nd-level bitmap of the open
file bitmaps, showing which words are already full.  So then we can
traverse that second-level bitmap to efficiently skip already allocated
file descriptors.

On his benchmark, this improves performance by up to an order of
magnitude, by avoiding the excessive open file bitmap scanning.

Tested-and-acked-by: Eric Dumazet <edumazet@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-10-31 16:12:10 -07:00

124 lines
3.1 KiB
C

/*
* descriptor table internals; you almost certainly want file.h instead.
*/
#ifndef __LINUX_FDTABLE_H
#define __LINUX_FDTABLE_H
#include <linux/posix_types.h>
#include <linux/compiler.h>
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/atomic.h>
/*
* The default fd array needs to be at least BITS_PER_LONG,
* as this is the granularity returned by copy_fdset().
*/
#define NR_OPEN_DEFAULT BITS_PER_LONG
struct fdtable {
unsigned int max_fds;
struct file __rcu **fd; /* current fd array */
unsigned long *close_on_exec;
unsigned long *open_fds;
unsigned long *full_fds_bits;
struct rcu_head rcu;
};
static inline bool close_on_exec(int fd, const struct fdtable *fdt)
{
return test_bit(fd, fdt->close_on_exec);
}
static inline bool fd_is_open(int fd, const struct fdtable *fdt)
{
return test_bit(fd, fdt->open_fds);
}
/*
* Open file table structure
*/
struct files_struct {
/*
* read mostly part
*/
atomic_t count;
bool resize_in_progress;
wait_queue_head_t resize_wait;
struct fdtable __rcu *fdt;
struct fdtable fdtab;
/*
* written part on a separate cache line in SMP
*/
spinlock_t file_lock ____cacheline_aligned_in_smp;
int next_fd;
unsigned long close_on_exec_init[1];
unsigned long open_fds_init[1];
unsigned long full_fds_bits_init[1];
struct file __rcu * fd_array[NR_OPEN_DEFAULT];
};
struct file_operations;
struct vfsmount;
struct dentry;
#define rcu_dereference_check_fdtable(files, fdtfd) \
rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock))
#define files_fdtable(files) \
rcu_dereference_check_fdtable((files), (files)->fdt)
/*
* The caller must ensure that fd table isn't shared or hold rcu or file lock
*/
static inline struct file *__fcheck_files(struct files_struct *files, unsigned int fd)
{
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
if (fd < fdt->max_fds)
return rcu_dereference_raw(fdt->fd[fd]);
return NULL;
}
static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd)
{
RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
!lockdep_is_held(&files->file_lock),
"suspicious rcu_dereference_check() usage");
return __fcheck_files(files, fd);
}
/*
* Check whether the specified fd has an open file.
*/
#define fcheck(fd) fcheck_files(current->files, fd)
struct task_struct;
struct files_struct *get_files_struct(struct task_struct *);
void put_files_struct(struct files_struct *fs);
void reset_files_struct(struct files_struct *);
int unshare_files(struct files_struct **);
struct files_struct *dup_fd(struct files_struct *, int *);
void do_close_on_exec(struct files_struct *);
int iterate_fd(struct files_struct *, unsigned,
int (*)(const void *, struct file *, unsigned),
const void *);
extern int __alloc_fd(struct files_struct *files,
unsigned start, unsigned end, unsigned flags);
extern void __fd_install(struct files_struct *files,
unsigned int fd, struct file *file);
extern int __close_fd(struct files_struct *files,
unsigned int fd);
extern struct kmem_cache *files_cachep;
#endif /* __LINUX_FDTABLE_H */