IB/rdmavt: Add post send to rdmavt

Add in a post_send and post_one_send to rdmavt. The ULP will provide a WQE
to rdmavt which will then walk and queue each element. Rdmavt will then
queue the work to be done in the driver or kick the driver's progress
routine.

There needs to be a follow on patch which adds in another lock for the
head of the queue so that it can be added to and read from in parallel.
This will touch protocol handlers and require other changes in the
drivers. This will be done separately.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
Dennis Dalessandro 2016-01-22 13:00:22 -08:00 committed by Doug Ledford
parent 6f6387ae75
commit bfbac097b6
3 changed files with 206 additions and 14 deletions

View file

@ -53,6 +53,27 @@
#include "qp.h"
#include "vt.h"
/*
* Note that it is OK to post send work requests in the SQE and ERR
* states; rvt_do_send() will process them and generate error
* completions as per IB 1.2 C10-96.
*/
const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = 0,
[IB_QPS_INIT] = RVT_POST_RECV_OK,
[IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK,
[IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK |
RVT_PROCESS_NEXT_SEND_OK,
[IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK,
[IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
RVT_POST_SEND_OK | RVT_FLUSH_SEND,
[IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV |
RVT_POST_SEND_OK | RVT_FLUSH_SEND,
};
EXPORT_SYMBOL(ib_rvt_state_ops);
static void get_map_page(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map,
gfp_t gfp)
@ -586,7 +607,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
/*
* Return the address of the RWQ as the offset to mmap.
* See hfi1_mmap() for details.
* See rvt_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
if (!qp->r_rq.wq) {
@ -749,6 +770,118 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
return -EOPNOTSUPP;
}
/**
* rvt_post_one_wr - post one RC, UC, or UD send work request
* @qp: the QP to post on
* @wr: the work request to send
*/
static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr)
{
struct rvt_swqe *wqe;
u32 next;
int i;
int j;
int acc;
struct rvt_lkey_table *rkt;
struct rvt_pd *pd;
struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
/* IB spec says that num_sge == 0 is OK. */
if (unlikely(wr->num_sge > qp->s_max_sge))
return -EINVAL;
/*
* Don't allow RDMA reads or atomic operations on UC or
* undefined operations.
* Make sure buffer is large enough to hold the result for atomics.
*/
if (qp->ibqp.qp_type == IB_QPT_UC) {
if ((unsigned)wr->opcode >= IB_WR_RDMA_READ)
return -EINVAL;
} else if (qp->ibqp.qp_type != IB_QPT_RC) {
/* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
if (wr->opcode != IB_WR_SEND &&
wr->opcode != IB_WR_SEND_WITH_IMM)
return -EINVAL;
/* Check UD destination address PD */
if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
return -EINVAL;
} else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
return -EINVAL;
} else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
(wr->num_sge == 0 ||
wr->sg_list[0].length < sizeof(u64) ||
wr->sg_list[0].addr & (sizeof(u64) - 1))) {
return -EINVAL;
} else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
return -EINVAL;
}
next = qp->s_head + 1;
if (next >= qp->s_size)
next = 0;
if (next == qp->s_last)
return -ENOMEM;
rkt = &rdi->lkey_table;
pd = ibpd_to_rvtpd(qp->ibqp.pd);
wqe = rvt_get_swqe_ptr(qp, qp->s_head);
if (qp->ibqp.qp_type != IB_QPT_UC &&
qp->ibqp.qp_type != IB_QPT_RC)
memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
wr->opcode == IB_WR_RDMA_WRITE ||
wr->opcode == IB_WR_RDMA_READ)
memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
else
memcpy(&wqe->wr, wr, sizeof(wqe->wr));
wqe->length = 0;
j = 0;
if (wr->num_sge) {
acc = wr->opcode >= IB_WR_RDMA_READ ?
IB_ACCESS_LOCAL_WRITE : 0;
for (i = 0; i < wr->num_sge; i++) {
u32 length = wr->sg_list[i].length;
int ok;
if (length == 0)
continue;
ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j],
&wr->sg_list[i], acc);
if (!ok)
goto bail_inval_free;
wqe->length += length;
j++;
}
wqe->wr.num_sge = j;
}
if (qp->ibqp.qp_type == IB_QPT_UC ||
qp->ibqp.qp_type == IB_QPT_RC) {
if (wqe->length > 0x80000000U)
goto bail_inval_free;
} else {
atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
}
wqe->ssn = qp->s_ssn++;
qp->s_head = next;
return 0;
bail_inval_free:
/* release mr holds */
while (j) {
struct rvt_sge *sge = &wqe->sg_list[--j];
rvt_put_mr(sge->mr);
}
return -EINVAL;
}
/**
* rvt_post_send - post a send on a QP
* @ibqp: the QP to post the send on
@ -760,20 +893,46 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
/*
* VT-DRIVER-API: do_send()
* Driver needs to have a do_send() call which is a single entry point
* to take an already formed packet and throw it out on the wire. Once
* the packet is sent the driver needs to make an upcall to rvt so the
* completion queue can be notified and/or any other outstanding
* work/book keeping can be finished.
*
* Note that there should also be a way for rvt to protect itself
* against hangs in the driver layer. If a send doesn't actually
* complete in a timely manor rvt needs to return an error event.
*/
struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
unsigned long flags = 0;
int call_send;
unsigned nreq = 0;
int err = 0;
return -EOPNOTSUPP;
spin_lock_irqsave(&qp->s_lock, flags);
/*
* Ensure QP state is such that we can send. If not bail out early,
* there is no need to do this every time we post a send.
*/
if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) {
spin_unlock_irqrestore(&qp->s_lock, flags);
return -EINVAL;
}
/*
* If the send queue is empty, and we only have a single WR then just go
* ahead and kick the send engine into gear. Otherwise we will always
* just schedule the send to happen later.
*/
call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next;
for (; wr; wr = wr->next) {
err = rvt_post_one_wr(qp, wr);
if (unlikely(err)) {
*bad_wr = wr;
goto bail;
}
nreq++;
}
bail:
if (nreq && !call_send)
rdi->driver_f.schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
if (nreq && call_send)
rdi->driver_f.do_send(qp);
return err;
}
/**

View file

@ -231,6 +231,8 @@ struct rvt_driver_provided {
gfp_t gfp);
void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
void (*notify_qp_reset)(struct rvt_qp *qp);
void (*schedule_send)(struct rvt_qp *qp);
void (*do_send)(struct rvt_qp *qp);
/*--------------------*/
/* Optional functions */
@ -312,6 +314,11 @@ static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq)
return container_of(ibsrq, struct rvt_srq, ibsrq);
}
static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct rvt_qp, ibqp);
}
static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
{
/*

View file

@ -129,6 +129,17 @@
/* Number of bits to pay attention to in the opcode for checking qp type */
#define RVT_OPCODE_QP_MASK 0xE0
/* Flags for checking QP state (see ib_rvt_state_ops[]) */
#define RVT_POST_SEND_OK 0x01
#define RVT_POST_RECV_OK 0x02
#define RVT_PROCESS_RECV_OK 0x04
#define RVT_PROCESS_SEND_OK 0x08
#define RVT_PROCESS_NEXT_SEND_OK 0x10
#define RVT_FLUSH_SEND 0x20
#define RVT_FLUSH_RECV 0x40
#define RVT_PROCESS_OR_FLUSH_SEND \
(RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND)
/*
* Send work request queue entry.
* The size of the sg_list is determined when the QP is created and stored
@ -373,4 +384,19 @@ struct rvt_qp_ibdev {
struct rvt_qpn_table qpn_table;
};
/*
* Since struct rvt_swqe is not a fixed size, we can't simply index into
* struct hfi1_qp.s_wq. This function does the array index computation.
*/
static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp,
unsigned n)
{
return (struct rvt_swqe *)((char *)qp->s_wq +
(sizeof(struct rvt_swqe) +
qp->s_max_sge *
sizeof(struct rvt_sge)) * n);
}
extern const int ib_rvt_state_ops[];
#endif /* DEF_RDMAVT_INCQP_H */