1
0
Fork 0

io_uring-5.10-2020-10-20

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl+O9WEQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpqajEADSD5PiO94YWTtVNWFUjn5RW+GlCE70/0VV
 XImdasmvM8nb48E+z2EW0Ky4vKXeVy5r+WZAeIYqPUHy/ogQDVpEn00NL7tFQmOz
 8UYrlZ3LLE/bSeWM5iavgG7TldVs/ZfspJ0hj3/Ac7jJpzuRGEI5TClsxJ0mWV39
 b2qT4OYBDhwvwVPZ/qhWgEwJXJFzFywckouIbMw8gPkveebUYeyu/yuScNGwYuiQ
 46YPEk/XIuOy8iUvQjqhLY+NNlAKJwt3z9WZgt5F+TZIhkpp7z6h20+jezFQcuFP
 GXzIDN+EADpsbw7MWJYIZVffxDEMlDpkJlAVMT1hsYLDfTXoEzmFwRddoFh2Fjf6
 ghWqhOKffUuAOX2xs1MrS2xLaxd0ot7QqZJVTYk7zEljkaRANlstSZZ+PpI+Sad/
 rNieQvs6jnsmTODDEaV3qyFX5aBQ2NdvyndZNU9wz0GZAWAdz+wxE0A1FVD0A37i
 p6m8sIvhNg3/cW89G04IDYUkAygi8knVDnEDHRwaJtswZQ4pRSGMp+N4qZ0GpnK7
 BviaAhofGaYlqruavO6Ug2YyomYpWGlUxTaB9ZKh0HkEDlDM945+0sgQRdxfsE8d
 OboycqJn3puOl/wh5Fc4oGYrWLsDbaA/5kksC4lm85Z+HUf+UXMS4QFdoPJYjhuM
 H6oMz1w2bQ==
 =v56S
 -----END PGP SIGNATURE-----

Merge tag 'io_uring-5.10-2020-10-20' of git://git.kernel.dk/linux-block

Pull io_uring updates from Jens Axboe:
 "A mix of fixes and a few stragglers. In detail:

   - Revert the bogus __read_mostly that we discussed for the initial
     pull request.

   - Fix a merge window regression with fixed file registration error
     path handling.

   - Fix io-wq numa node affinities.

   - Series abstracting out an io_identity struct, making it both easier
     to see what the personality items are, and also easier to to adopt
     more. Use this to cover audit logging.

   - Fix for read-ahead disabled block condition in async buffered
     reads, and using single page read-ahead to unify what
     generic_file_buffer_read() path is used.

   - Series for REQ_F_COMP_LOCKED fix and removal of it (Pavel)

   - Poll fix (Pavel)"

* tag 'io_uring-5.10-2020-10-20' of git://git.kernel.dk/linux-block: (21 commits)
  io_uring: use blk_queue_nowait() to check if NOWAIT supported
  mm: use limited read-ahead to satisfy read
  mm: mark async iocb read as NOWAIT once some data has been copied
  io_uring: fix double poll mask init
  io-wq: inherit audit loginuid and sessionid
  io_uring: use percpu counters to track inflight requests
  io_uring: assign new io_identity for task if members have changed
  io_uring: store io_identity in io_uring_task
  io_uring: COW io_identity on mismatch
  io_uring: move io identity items into separate struct
  io_uring: rely solely on work flags to determine personality.
  io_uring: pass required context in as flags
  io-wq: assign NUMA node locality if appropriate
  io_uring: fix error path cleanup in io_sqe_files_register()
  Revert "io_uring: mark io_uring_fops/io_op_defs as __read_mostly"
  io_uring: fix REQ_F_COMP_LOCKED by killing it
  io_uring: dig out COMP_LOCK from deep call chain
  io_uring: don't put a poll req under spinlock
  io_uring: don't unnecessarily clear F_LINK_TIMEOUT
  io_uring: don't set COMP_LOCKED if won't put
  ...
zero-sugar-mainline-defconfig
Linus Torvalds 2020-10-20 13:19:30 -07:00
commit 4962a85696
7 changed files with 486 additions and 312 deletions

View File

@ -18,6 +18,7 @@
#include <linux/fs_struct.h>
#include <linux/task_work.h>
#include <linux/blk-cgroup.h>
#include <linux/audit.h>
#include "io-wq.h"
@ -429,14 +430,10 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work)
mmput(worker->mm);
worker->mm = NULL;
}
if (!work->mm)
return;
if (mmget_not_zero(work->mm)) {
kthread_use_mm(work->mm);
worker->mm = work->mm;
/* hang on to this mm */
work->mm = NULL;
if (mmget_not_zero(work->identity->mm)) {
kthread_use_mm(work->identity->mm);
worker->mm = work->identity->mm;
return;
}
@ -448,9 +445,11 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker,
struct io_wq_work *work)
{
#ifdef CONFIG_BLK_CGROUP
if (work->blkcg_css != worker->blkcg_css) {
kthread_associate_blkcg(work->blkcg_css);
worker->blkcg_css = work->blkcg_css;
if (!(work->flags & IO_WQ_WORK_BLKCG))
return;
if (work->identity->blkcg_css != worker->blkcg_css) {
kthread_associate_blkcg(work->identity->blkcg_css);
worker->blkcg_css = work->identity->blkcg_css;
}
#endif
}
@ -458,9 +457,9 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker,
static void io_wq_switch_creds(struct io_worker *worker,
struct io_wq_work *work)
{
const struct cred *old_creds = override_creds(work->creds);
const struct cred *old_creds = override_creds(work->identity->creds);
worker->cur_creds = work->creds;
worker->cur_creds = work->identity->creds;
if (worker->saved_creds)
put_cred(old_creds); /* creds set by previous switch */
else
@ -470,20 +469,26 @@ static void io_wq_switch_creds(struct io_worker *worker,
static void io_impersonate_work(struct io_worker *worker,
struct io_wq_work *work)
{
if (work->files && current->files != work->files) {
if ((work->flags & IO_WQ_WORK_FILES) &&
current->files != work->identity->files) {
task_lock(current);
current->files = work->files;
current->nsproxy = work->nsproxy;
current->files = work->identity->files;
current->nsproxy = work->identity->nsproxy;
task_unlock(current);
}
if (work->fs && current->fs != work->fs)
current->fs = work->fs;
if (work->mm != worker->mm)
if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs)
current->fs = work->identity->fs;
if ((work->flags & IO_WQ_WORK_MM) && work->identity->mm != worker->mm)
io_wq_switch_mm(worker, work);
if (worker->cur_creds != work->creds)
if ((work->flags & IO_WQ_WORK_CREDS) &&
worker->cur_creds != work->identity->creds)
io_wq_switch_creds(worker, work);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->fsize;
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize;
io_wq_switch_blkcg(worker, work);
#ifdef CONFIG_AUDIT
current->loginuid = work->identity->loginuid;
current->sessionid = work->identity->sessionid;
#endif
}
static void io_assign_current_work(struct io_worker *worker,
@ -496,6 +501,11 @@ static void io_assign_current_work(struct io_worker *worker,
cond_resched();
}
#ifdef CONFIG_AUDIT
current->loginuid = KUIDT_INIT(AUDIT_UID_UNSET);
current->sessionid = AUDIT_SID_UNSET;
#endif
spin_lock_irq(&worker->lock);
worker->cur_work = work;
spin_unlock_irq(&worker->lock);
@ -676,6 +686,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
kfree(worker);
return false;
}
kthread_bind_mask(worker->task, cpumask_of_node(wqe->node));
raw_spin_lock_irq(&wqe->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);

View File

@ -1,6 +1,8 @@
#ifndef INTERNAL_IO_WQ_H
#define INTERNAL_IO_WQ_H
#include <linux/io_uring.h>
struct io_wq;
enum {
@ -10,6 +12,12 @@ enum {
IO_WQ_WORK_NO_CANCEL = 8,
IO_WQ_WORK_CONCURRENT = 16,
IO_WQ_WORK_FILES = 32,
IO_WQ_WORK_FS = 64,
IO_WQ_WORK_MM = 128,
IO_WQ_WORK_CREDS = 256,
IO_WQ_WORK_BLKCG = 512,
IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
};
@ -85,15 +93,7 @@ static inline void wq_list_del(struct io_wq_work_list *list,
struct io_wq_work {
struct io_wq_work_node list;
struct files_struct *files;
struct mm_struct *mm;
#ifdef CONFIG_BLK_CGROUP
struct cgroup_subsys_state *blkcg_css;
#endif
const struct cred *creds;
struct nsproxy *nsproxy;
struct fs_struct *fs;
unsigned long fsize;
struct io_identity *identity;
unsigned flags;
};

File diff suppressed because it is too large Load Diff

View File

@ -1268,6 +1268,10 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
kuid_t kloginuid;
int rv;
/* Don't let kthreads write their own loginuid */
if (current->flags & PF_KTHREAD)
return -EPERM;
rcu_read_lock();
if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
rcu_read_unlock();

View File

@ -4,18 +4,33 @@
#include <linux/sched.h>
#include <linux/xarray.h>
#include <linux/percpu-refcount.h>
struct io_identity {
struct files_struct *files;
struct mm_struct *mm;
#ifdef CONFIG_BLK_CGROUP
struct cgroup_subsys_state *blkcg_css;
#endif
const struct cred *creds;
struct nsproxy *nsproxy;
struct fs_struct *fs;
unsigned long fsize;
#ifdef CONFIG_AUDIT
kuid_t loginuid;
unsigned int sessionid;
#endif
refcount_t count;
};
struct io_uring_task {
/* submission side */
struct xarray xa;
struct wait_queue_head wait;
struct file *last;
atomic_long_t req_issue;
/* completion side */
bool in_idle ____cacheline_aligned_in_smp;
atomic_long_t req_complete;
struct percpu_counter inflight;
struct io_identity __identity;
struct io_identity *identity;
bool in_idle;
};
#if defined(CONFIG_IO_URING)

View File

@ -2199,6 +2199,14 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
offset = *ppos & ~PAGE_MASK;
/*
* If we've already successfully copied some data, then we
* can no longer safely return -EIOCBQUEUED. Hence mark
* an async read NOWAIT at that point.
*/
if (written && (iocb->ki_flags & IOCB_WAITQ))
iocb->ki_flags |= IOCB_NOWAIT;
for (;;) {
struct page *page;
pgoff_t end_index;

View File

@ -552,15 +552,23 @@ readit:
void page_cache_sync_ra(struct readahead_control *ractl,
struct file_ra_state *ra, unsigned long req_count)
{
/* no read-ahead */
if (!ra->ra_pages)
return;
bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
if (blk_cgroup_congested())
return;
/*
* Even if read-ahead is disabled, issue this request as read-ahead
* as we'll need it to satisfy the requested range. The forced
* read-ahead will do the right thing and limit the read to just the
* requested range, which we'll set to 1 page for this case.
*/
if (!ra->ra_pages || blk_cgroup_congested()) {
if (!ractl->file)
return;
req_count = 1;
do_forced_ra = true;
}
/* be dumb */
if (ractl->file && (ractl->file->f_mode & FMODE_RANDOM)) {
if (do_forced_ra) {
force_page_cache_ra(ractl, ra, req_count);
return;
}