1
0
Fork 0

io_uring-5.12-2021-03-27

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmBf1KAQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpjVSD/0f1HdekXnIE6aSRQ7YEV8ux2t5wUeDyP8U
 cdcZ8fBW9PvKZLdODSI4sw8UYV5OYEBcfImFe3nRVHR+RIVQo72UTYvuHqeUYNct
 w3drgF2GEMIxJFZR6zf9LDrQVduPqXvbEJLui6TN+eX/5E99ZlUWMLwkX1k+vDju
 QfaGZjz2736GTn1MPc7jdyZKoK7eCi5xtNFPash5wGck7aYl5TGXnG/8bRYsv2Tw
 eCYKbvv4x0s8OFcYVQMooDfbIMCyyfTwt6YatFHQEtM/RM+M66gndvv3jfkeJQju
 hz0I8qOJ8X5lf0VucncWs5J8b9Whr5YZV+k9461xalBbV9ed2vzIIikP8DpCxtYz
 yKbsdDm0+3hwfuZOz+d7ooEXKsphJ1PnSsEeuNZXtKDXVtphksUbbq4H2NLINcsQ
 m6dwaRPSEA0EymngGY2e+8+CU0euiE4mqoMpw4D9m9Irs+BAaWYGk9xCWr0BGem0
 auZOMqvV2xktdBlGx1BJCLts1sHHxy8IM3u0852R/1AfcKOkXwNVPt62I8e9ceIA
 wc731aWHwJfS25m430xFDPJKJpUZoZgste4qwVym70CmRziuamgYyIfrfRg1ZjsD
 ZBa9Z4hPiT4e0eDqlYjcMpl9FORgYQXVXy5ofd/eZg5xkU8X+i6TVZkaQNkZyqV/
 4ogBZYUolg==
 =mwLC
 -----END PGP SIGNATURE-----

Merge tag 'io_uring-5.12-2021-03-27' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

 - Use thread info versions of flag testing, as discussed last week.

 - The series enabling PF_IO_WORKER to just take signals, instead of
   needing to special case that they do not in a bunch of places. Ends
   up being pretty trivial to do, and then we can revert all the special
   casing we're currently doing.

 - Kill dead pointer assignment

 - Fix hashed part of async work queue trace

 - Fix sign extension issue for IORING_OP_PROVIDE_BUFFERS

 - Fix a link completion ordering regression in this merge window

 - Cancellation fixes

* tag 'io_uring-5.12-2021-03-27' of git://git.kernel.dk/linux-block:
  io_uring: remove unsued assignment to pointer io
  io_uring: don't cancel extra on files match
  io_uring: don't cancel-track common timeouts
  io_uring: do post-completion chore on t-out cancel
  io_uring: fix timeout cancel return code
  Revert "signal: don't allow STOP on PF_IO_WORKER threads"
  Revert "kernel: freezer should treat PF_IO_WORKER like PF_KTHREAD for freezing"
  Revert "kernel: treat PF_IO_WORKER like PF_KTHREAD for ptrace/signals"
  Revert "signal: don't allow sending any signals to PF_IO_WORKER threads"
  kernel: stop masking signals in create_io_thread()
  io_uring: handle signals for IO threads like a normal thread
  kernel: don't call do_exit() for PF_IO_WORKER threads
  io_uring: maintain CQE order of a failed link
  io-wq: fix race around pending work on teardown
  io_uring: do ctx sqd ejection in a clear context
  io_uring: fix provide_buffers sign extension
  io_uring: don't skip file_end_write() on reissue
  io_uring: correct io_queue_async_work() traces
  io_uring: don't use {test,clear}_tsk_thread_flag() for current
rM2-mainline
Linus Torvalds 2021-03-28 11:42:05 -07:00
commit b44d1ddcf8
6 changed files with 94 additions and 76 deletions

View File

@ -16,7 +16,6 @@
#include <linux/rculist_nulls.h>
#include <linux/cpu.h>
#include <linux/tracehook.h>
#include <linux/freezer.h>
#include "../kernel/sched/sched.h"
#include "io-wq.h"
@ -388,11 +387,9 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
static bool io_flush_signals(void)
{
if (unlikely(test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))) {
if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) {
__set_current_state(TASK_RUNNING);
if (current->task_works)
task_work_run();
clear_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL);
tracehook_notify_signal();
return true;
}
return false;
@ -505,10 +502,15 @@ loop:
if (io_flush_signals())
continue;
ret = schedule_timeout(WORKER_IDLE_TIMEOUT);
if (try_to_freeze() || ret)
continue;
if (fatal_signal_pending(current))
if (signal_pending(current)) {
struct ksignal ksig;
if (!get_signal(&ksig))
continue;
break;
}
if (ret)
continue;
/* timed out, exit unless we're the fixed worker */
if (test_bit(IO_WQ_BIT_EXIT, &wq->state) ||
!(worker->flags & IO_WORKER_F_FIXED))
@ -716,9 +718,13 @@ static int io_wq_manager(void *data)
set_current_state(TASK_INTERRUPTIBLE);
io_wq_check_workers(wq);
schedule_timeout(HZ);
try_to_freeze();
if (fatal_signal_pending(current))
if (signal_pending(current)) {
struct ksignal ksig;
if (!get_signal(&ksig))
continue;
set_bit(IO_WQ_BIT_EXIT, &wq->state);
}
} while (!test_bit(IO_WQ_BIT_EXIT, &wq->state));
io_wq_check_workers(wq);
@ -1065,7 +1071,11 @@ static void io_wq_destroy(struct io_wq *wq)
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
WARN_ON_ONCE(!wq_list_empty(&wqe->work_list));
struct io_cb_cancel_data match = {
.fn = io_wq_work_match_all,
.cancel_all = true,
};
io_wqe_cancel_pending_work(wqe, &match);
kfree(wqe);
}
io_wq_put_hash(wq->hash);

View File

@ -78,7 +78,6 @@
#include <linux/task_work.h>
#include <linux/pagemap.h>
#include <linux/io_uring.h>
#include <linux/freezer.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@ -1095,8 +1094,6 @@ static bool io_match_task(struct io_kiocb *head,
io_for_each_link(req, head) {
if (req->flags & REQ_F_INFLIGHT)
return true;
if (req->task->files == files)
return true;
}
return false;
}
@ -1239,16 +1236,16 @@ static void io_queue_async_work(struct io_kiocb *req)
BUG_ON(!tctx);
BUG_ON(!tctx->io_wq);
trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
&req->work, req->flags);
/* init ->work of the whole link before punting */
io_prep_async_link(req);
trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
&req->work, req->flags);
io_wq_enqueue(tctx->io_wq, &req->work);
if (link)
io_queue_linked_timeout(link);
}
static void io_kill_timeout(struct io_kiocb *req)
static void io_kill_timeout(struct io_kiocb *req, int status)
{
struct io_timeout_data *io = req->async_data;
int ret;
@ -1258,31 +1255,11 @@ static void io_kill_timeout(struct io_kiocb *req)
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
list_del_init(&req->timeout.list);
io_cqring_fill_event(req, 0);
io_cqring_fill_event(req, status);
io_put_req_deferred(req, 1);
}
}
/*
* Returns true if we found and killed one or more timeouts
*/
static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
struct files_struct *files)
{
struct io_kiocb *req, *tmp;
int canceled = 0;
spin_lock_irq(&ctx->completion_lock);
list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
if (io_match_task(req, tsk, files)) {
io_kill_timeout(req);
canceled++;
}
}
spin_unlock_irq(&ctx->completion_lock);
return canceled != 0;
}
static void __io_queue_deferred(struct io_ring_ctx *ctx)
{
do {
@ -1327,7 +1304,7 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
break;
list_del_init(&req->timeout.list);
io_kill_timeout(req);
io_kill_timeout(req, 0);
} while (!list_empty(&ctx->timeout_list));
ctx->cq_last_tm_flush = seq;
@ -2524,13 +2501,12 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
{
int cflags = 0;
if (req->rw.kiocb.ki_flags & IOCB_WRITE)
kiocb_end_write(req);
if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_reissue(req))
return;
if (res != req->result)
req_set_fail_links(req);
if (req->rw.kiocb.ki_flags & IOCB_WRITE)
kiocb_end_write(req);
if (req->flags & REQ_F_BUFFER_SELECTED)
cflags = io_put_rw_kbuf(req);
__io_req_complete(req, issue_flags, res, cflags);
@ -3978,6 +3954,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
static int io_provide_buffers_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
unsigned long size;
struct io_provide_buf *p = &req->pbuf;
u64 tmp;
@ -3991,7 +3968,8 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
p->addr = READ_ONCE(sqe->addr);
p->len = READ_ONCE(sqe->len);
if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs)))
size = (unsigned long)p->len * p->nbufs;
if (!access_ok(u64_to_user_ptr(p->addr), size))
return -EFAULT;
p->bgid = READ_ONCE(sqe->buf_group);
@ -4820,7 +4798,6 @@ static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
ret = -ENOMEM;
goto out;
}
io = req->async_data;
memcpy(req->async_data, &__io, sizeof(__io));
return -EAGAIN;
}
@ -5583,7 +5560,8 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
data->mode = io_translate_timeout_mode(flags);
hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
io_req_track_inflight(req);
if (is_timeout_link)
io_req_track_inflight(req);
return 0;
}
@ -6479,8 +6457,6 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
ret = io_init_req(ctx, req, sqe);
if (unlikely(ret)) {
fail_req:
io_put_req(req);
io_req_complete(req, ret);
if (link->head) {
/* fail even hard links since we don't submit */
link->head->flags |= REQ_F_FAIL_LINK;
@ -6488,6 +6464,8 @@ fail_req:
io_req_complete(link->head, -ECANCELED);
link->head = NULL;
}
io_put_req(req);
io_req_complete(req, ret);
return ret;
}
ret = io_req_prep(req, sqe);
@ -6764,8 +6742,13 @@ static int io_sq_thread(void *data)
timeout = jiffies + sqd->sq_thread_idle;
continue;
}
if (fatal_signal_pending(current))
if (signal_pending(current)) {
struct ksignal ksig;
if (!get_signal(&ksig))
continue;
break;
}
sqt_spin = false;
cap_entries = !list_is_singular(&sqd->ctx_list);
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
@ -6808,7 +6791,6 @@ static int io_sq_thread(void *data)
mutex_unlock(&sqd->lock);
schedule();
try_to_freeze();
mutex_lock(&sqd->lock);
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
io_ring_clear_wakeup_flag(ctx);
@ -6873,7 +6855,7 @@ static int io_run_task_work_sig(void)
return 1;
if (!signal_pending(current))
return 0;
if (test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))
if (test_thread_flag(TIF_NOTIFY_SIGNAL))
return -ERESTARTSYS;
return -EINTR;
}
@ -8563,6 +8545,14 @@ static void io_ring_exit_work(struct work_struct *work)
struct io_tctx_node *node;
int ret;
/* prevent SQPOLL from submitting new requests */
if (ctx->sq_data) {
io_sq_thread_park(ctx->sq_data);
list_del_init(&ctx->sqd_list);
io_sqd_update_thread_idle(ctx->sq_data);
io_sq_thread_unpark(ctx->sq_data);
}
/*
* If we're doing polled IO and end up having requests being
* submitted async (out-of-line), then completions can come in while
@ -8599,6 +8589,28 @@ static void io_ring_exit_work(struct work_struct *work)
io_ring_ctx_free(ctx);
}
/* Returns true if we found and killed one or more timeouts */
static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
struct files_struct *files)
{
struct io_kiocb *req, *tmp;
int canceled = 0;
spin_lock_irq(&ctx->completion_lock);
list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
if (io_match_task(req, tsk, files)) {
io_kill_timeout(req, -ECANCELED);
canceled++;
}
}
io_commit_cqring(ctx);
spin_unlock_irq(&ctx->completion_lock);
if (canceled != 0)
io_cqring_ev_posted(ctx);
return canceled != 0;
}
static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
{
unsigned long index;
@ -8614,14 +8626,6 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
io_unregister_personality(ctx, index);
mutex_unlock(&ctx->uring_lock);
/* prevent SQPOLL from submitting new requests */
if (ctx->sq_data) {
io_sq_thread_park(ctx->sq_data);
list_del_init(&ctx->sqd_list);
io_sqd_update_thread_idle(ctx->sq_data);
io_sq_thread_unpark(ctx->sq_data);
}
io_kill_timeouts(ctx, NULL, NULL);
io_poll_remove_all(ctx, NULL, NULL);

View File

@ -1948,8 +1948,14 @@ static __latent_entropy struct task_struct *copy_process(
p = dup_task_struct(current, node);
if (!p)
goto fork_out;
if (args->io_thread)
if (args->io_thread) {
/*
* Mark us an IO worker, and block any signal that isn't
* fatal or STOP
*/
p->flags |= PF_IO_WORKER;
siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
}
/*
* This _must_ happen before we call free_task(), i.e. before we jump
@ -2438,14 +2444,8 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
.stack_size = (unsigned long)arg,
.io_thread = 1,
};
struct task_struct *tsk;
tsk = copy_process(NULL, 0, node, &args);
if (!IS_ERR(tsk)) {
sigfillset(&tsk->blocked);
sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
}
return tsk;
return copy_process(NULL, 0, node, &args);
}
/*

View File

@ -134,7 +134,7 @@ bool freeze_task(struct task_struct *p)
return false;
}
if (!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))
if (!(p->flags & PF_KTHREAD))
fake_signal_wake_up(p);
else
wake_up_state(p, TASK_INTERRUPTIBLE);

View File

@ -375,7 +375,7 @@ static int ptrace_attach(struct task_struct *task, long request,
audit_ptrace(task);
retval = -EPERM;
if (unlikely(task->flags & (PF_KTHREAD | PF_IO_WORKER)))
if (unlikely(task->flags & PF_KTHREAD))
goto out;
if (same_thread_group(task, current))
goto out;

View File

@ -91,7 +91,7 @@ static bool sig_task_ignored(struct task_struct *t, int sig, bool force)
return true;
/* Only allow kernel generated signals to this kthread */
if (unlikely((t->flags & (PF_KTHREAD | PF_IO_WORKER)) &&
if (unlikely((t->flags & PF_KTHREAD) &&
(handler == SIG_KTHREAD_KERNEL) && !force))
return true;
@ -288,8 +288,7 @@ bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask)
JOBCTL_STOP_SIGMASK | JOBCTL_TRAPPING));
BUG_ON((mask & JOBCTL_TRAPPING) && !(mask & JOBCTL_PENDING_MASK));
if (unlikely(fatal_signal_pending(task) ||
(task->flags & (PF_EXITING | PF_IO_WORKER))))
if (unlikely(fatal_signal_pending(task) || (task->flags & PF_EXITING)))
return false;
if (mask & JOBCTL_STOP_SIGMASK)
@ -834,9 +833,6 @@ static int check_kill_permission(int sig, struct kernel_siginfo *info,
if (!valid_signal(sig))
return -EINVAL;
/* PF_IO_WORKER threads don't take any signals */
if (t->flags & PF_IO_WORKER)
return -ESRCH;
if (!si_fromuser(info))
return 0;
@ -1100,7 +1096,7 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc
/*
* Skip useless siginfo allocation for SIGKILL and kernel threads.
*/
if ((sig == SIGKILL) || (t->flags & (PF_KTHREAD | PF_IO_WORKER)))
if ((sig == SIGKILL) || (t->flags & PF_KTHREAD))
goto out_set;
/*
@ -2771,6 +2767,14 @@ relock:
do_coredump(&ksig->info);
}
/*
* PF_IO_WORKER threads will catch and exit on fatal signals
* themselves. They have cleanup that must be performed, so
* we cannot call do_exit() on their behalf.
*/
if (current->flags & PF_IO_WORKER)
goto out;
/*
* Death signals, no core dump.
*/
@ -2778,7 +2782,7 @@ relock:
/* NOTREACHED */
}
spin_unlock_irq(&sighand->siglock);
out:
ksig->sig = signr;
if (!(ksig->ka.sa.sa_flags & SA_EXPOSE_TAGBITS))