1
0
Fork 0

Merge branch 'tid-write' into hfi1-tid

Here is the final set of patches for TID RDMA. Again this is code which
was previously submitted but re-organized so as to be easier to review.

Similar to how the READ series was organized the patches to build,
receive, allocate resources etc are broken out. For details on TID RDMA
as a whole again refer to the original cover letter.

https://www.spinics.net/lists/linux-rdma/msg66611.html

* tid-write: (23 commits)
  IB/hfi1: Prioritize the sending of ACK packets
  IB/hfi1: Add static trace for TID RDMA WRITE protocol
  IB/hfi1: Enable TID RDMA WRITE protocol
  IB/hfi1: Add interlock between TID RDMA WRITE and other requests
  IB/hfi1: Add TID RDMA WRITE functionality into RDMA verbs
  IB/hfi1: Add the dual leg code
  IB/hfi1: Add the TID second leg ACK packet builder
  IB/hfi1: Add the TID second leg send packet builder
  IB/hfi1: Resend the TID RDMA WRITE DATA packets
  IB/hfi1: Add a function to receive TID RDMA RESYNC packet
  IB/hfi1: Add a function to build TID RDMA RESYNC packet
  IB/hfi1: Add TID RDMA retry timer
  IB/hfi1: Add a function to receive TID RDMA ACK packet
  IB/hfi1: Add a function to build TID RDMA ACK packet
  IB/hfi1: Add a function to receive TID RDMA WRITE DATA packet
  IB/hfi1: Add a function to build TID RDMA WRITE DATA packet
  IB/hfi1: Add a function to receive TID RDMA WRITE response
  IB/hfi1: Add TID resource timer
  IB/hfi1: Add a function to build TID RDMA WRITE response
  IB/hfi1: Add functions to receive TID RDMA WRITE request
  ...

Signed-off-by: Doug Ledford <dledford@redhat.com>
hifive-unleashed-5.1
Doug Ledford 2019-02-05 18:08:52 -05:00
commit 885c5807fa
27 changed files with 4025 additions and 131 deletions

View File

@ -1512,6 +1512,7 @@ static int __init hfi1_mod_init(void)
goto bail_dev;
}
hfi1_compute_tid_rdma_flow_wt();
/*
* These must be called before the driver is registered with
* the PCI subsystem.

View File

@ -6,6 +6,9 @@
#include "iowait.h"
#include "trace_iowait.h"
/* 1 priority == 16 starve_cnt */
#define IOWAIT_PRIORITY_STARVE_SHIFT 4
void iowait_set_flag(struct iowait *wait, u32 flag)
{
trace_hfi1_iowait_set(wait, flag);
@ -44,7 +47,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
uint seq,
bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait))
void (*sdma_drained)(struct iowait *wait),
void (*init_priority)(struct iowait *wait))
{
int i;
@ -58,6 +62,7 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
wait->sleep = sleep;
wait->wakeup = wakeup;
wait->sdma_drained = sdma_drained;
wait->init_priority = init_priority;
wait->flags = 0;
for (i = 0; i < IOWAIT_SES; i++) {
wait->wait[i].iow = wait;
@ -92,3 +97,30 @@ int iowait_set_work_flag(struct iowait_work *w)
iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
return IOWAIT_TID_SE;
}
/**
* iowait_priority_update_top - update the top priority entry
* @w: the iowait struct
* @top: a pointer to the top priority entry
* @idx: the index of the current iowait in an array
* @top_idx: the array index for the iowait entry that has the top priority
*
* This function is called to compare the priority of a given
* iowait with the given top priority entry. The top index will
* be returned.
*/
uint iowait_priority_update_top(struct iowait *w,
struct iowait *top,
uint idx, uint top_idx)
{
u8 cnt, tcnt;
/* Convert priority into starve_cnt and compare the total.*/
cnt = (w->priority << IOWAIT_PRIORITY_STARVE_SHIFT) + w->starved_cnt;
tcnt = (top->priority << IOWAIT_PRIORITY_STARVE_SHIFT) +
top->starved_cnt;
if (cnt > tcnt)
return idx;
else
return top_idx;
}

View File

@ -100,6 +100,7 @@ struct iowait_work {
* @sleep: no space callback
* @wakeup: space callback wakeup
* @sdma_drained: sdma count drained
* @init_priority: callback to manipulate priority
* @lock: lock protected head of wait queue
* @iowork: workqueue overhead
* @wait_dma: wait for sdma_busy == 0
@ -109,7 +110,7 @@ struct iowait_work {
* @tx_limit: limit for overflow queuing
* @tx_count: number of tx entry's in tx_head'ed list
* @flags: wait flags (one per QP)
* @wait: SE array
* @wait: SE array for multiple legs
*
* This is to be embedded in user's state structure
* (QP or PQ).
@ -120,10 +121,13 @@ struct iowait_work {
* are callbacks for the ULP to implement
* what ever queuing/dequeuing of
* the embedded iowait and its containing struct
* when a resource shortage like SDMA ring space is seen.
* when a resource shortage like SDMA ring space
* or PIO credit space is seen.
*
* Both potentially have locks help
* so sleeping is not allowed.
* so sleeping is not allowed and it is not
* supported to submit txreqs from the wakeup
* call directly because of lock conflicts.
*
* The wait_dma member along with the iow
*
@ -143,6 +147,7 @@ struct iowait {
);
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
void (*init_priority)(struct iowait *wait);
seqlock_t *lock;
wait_queue_head_t wait_dma;
wait_queue_head_t wait_pio;
@ -152,6 +157,7 @@ struct iowait {
u32 tx_limit;
u32 tx_count;
u8 starved_cnt;
u8 priority;
unsigned long flags;
struct iowait_work wait[IOWAIT_SES];
};
@ -171,7 +177,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
uint seq,
bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait));
void (*sdma_drained)(struct iowait *wait),
void (*init_priority)(struct iowait *wait));
/**
* iowait_schedule() - schedule the default send engine work
@ -185,6 +192,18 @@ static inline bool iowait_schedule(struct iowait *wait,
return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
}
/**
* iowait_tid_schedule - schedule the tid SE
* @wait: the iowait structure
* @wq: the work queue
* @cpu: the cpu
*/
static inline bool iowait_tid_schedule(struct iowait *wait,
struct workqueue_struct *wq, int cpu)
{
return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_TID_SE].iowork);
}
/**
* iowait_sdma_drain() - wait for DMAs to drain
*
@ -327,6 +346,8 @@ static inline u16 iowait_get_desc(struct iowait_work *w)
tx = list_first_entry(&w->tx_head, struct sdma_txreq,
list);
num_desc = tx->num_desc;
if (tx->flags & SDMA_TXREQ_F_VIP)
w->iow->priority++;
}
return num_desc;
}
@ -340,6 +361,37 @@ static inline u32 iowait_get_all_desc(struct iowait *w)
return num_desc;
}
static inline void iowait_update_priority(struct iowait_work *w)
{
struct sdma_txreq *tx = NULL;
if (!list_empty(&w->tx_head)) {
tx = list_first_entry(&w->tx_head, struct sdma_txreq,
list);
if (tx->flags & SDMA_TXREQ_F_VIP)
w->iow->priority++;
}
}
static inline void iowait_update_all_priority(struct iowait *w)
{
iowait_update_priority(&w->wait[IOWAIT_IB_SE]);
iowait_update_priority(&w->wait[IOWAIT_TID_SE]);
}
static inline void iowait_init_priority(struct iowait *w)
{
w->priority = 0;
if (w->init_priority)
w->init_priority(w);
}
static inline void iowait_get_priority(struct iowait *w)
{
iowait_init_priority(w);
iowait_update_all_priority(w);
}
/**
* iowait_queue - Put the iowait on a wait queue
* @pkts_sent: have some packets been sent before queuing?
@ -356,14 +408,18 @@ static inline void iowait_queue(bool pkts_sent, struct iowait *w,
/*
* To play fair, insert the iowait at the tail of the wait queue if it
* has already sent some packets; Otherwise, put it at the head.
* However, if it has priority packets to send, also put it at the
* head.
*/
if (pkts_sent) {
list_add_tail(&w->list, wait_head);
if (pkts_sent)
w->starved_cnt = 0;
} else {
list_add(&w->list, wait_head);
else
w->starved_cnt++;
}
if (w->priority > 0 || !pkts_sent)
list_add(&w->list, wait_head);
else
list_add_tail(&w->list, wait_head);
}
/**
@ -380,27 +436,10 @@ static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w)
w->starved_cnt = 0;
}
/**
* iowait_starve_find_max - Find the maximum of the starve count
* @w: the iowait struct
* @max: a variable containing the max starve count
* @idx: the index of the current iowait in an array
* @max_idx: a variable containing the array index for the
* iowait entry that has the max starve count
*
* This function is called to compare the starve count of a
* given iowait with the given max starve count. The max starve
* count and the index will be updated if the iowait's start
* count is larger.
*/
static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
uint idx, uint *max_idx)
{
if (w->starved_cnt > *max) {
*max = w->starved_cnt;
*max_idx = idx;
}
}
/* Update the top priority index */
uint iowait_priority_update_top(struct iowait *w,
struct iowait *top,
uint idx, uint top_idx);
/**
* iowait_packet_queued() - determine if a packet is queued

View File

@ -245,10 +245,15 @@ void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask)
struct hfi1_qp_priv *priv = qp->priv;
unsigned long flags;
if (attr_mask & IB_QP_RETRY_CNT)
priv->s_retry = attr->retry_cnt;
spin_lock_irqsave(&priv->opfn.lock, flags);
if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
struct tid_rdma_params *local = &priv->tid_rdma.local;
if (attr_mask & IB_QP_TIMEOUT)
priv->tid_retry_timeout_jiffies = qp->timeout_jiffies;
if (qp->pmtu == enum_to_mtu(OPA_MTU_4096) ||
qp->pmtu == enum_to_mtu(OPA_MTU_8192)) {
tid_rdma_opfn_init(qp, local);

View File

@ -1599,8 +1599,7 @@ static void sc_piobufavail(struct send_context *sc)
struct rvt_qp *qp;
struct hfi1_qp_priv *priv;
unsigned long flags;
uint i, n = 0, max_idx = 0;
u8 max_starved_cnt = 0;
uint i, n = 0, top_idx = 0;
if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
dd->send_contexts[sc->sw_index].type != SC_VL15)
@ -1619,11 +1618,18 @@ static void sc_piobufavail(struct send_context *sc)
if (n == ARRAY_SIZE(qps))
break;
wait = list_first_entry(list, struct iowait, list);
iowait_get_priority(wait);
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
iowait_starve_find_max(wait, &max_starved_cnt, n, &max_idx);
if (n) {
priv = qps[top_idx]->priv;
top_idx = iowait_priority_update_top(wait,
&priv->s_iowait,
n, top_idx);
}
/* refcount held until actual wake up */
qps[n++] = qp;
}
@ -1638,12 +1644,12 @@ static void sc_piobufavail(struct send_context *sc)
}
write_sequnlock_irqrestore(&sc->waitlock, flags);
/* Wake up the most starved one first */
/* Wake up the top-priority one first */
if (n)
hfi1_qp_wakeup(qps[max_idx],
hfi1_qp_wakeup(qps[top_idx],
RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
for (i = 0; i < n; i++)
if (i != max_idx)
if (i != top_idx)
hfi1_qp_wakeup(qps[i],
RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
}

View File

@ -138,6 +138,12 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
.flags = RVT_OPERATION_USE_RESERVE,
},
[IB_WR_TID_RDMA_WRITE] = {
.length = sizeof(struct ib_rdma_wr),
.qpt_support = BIT(IB_QPT_RC),
.flags = RVT_OPERATION_IGN_RNR_CNT,
},
};
static void flush_list_head(struct list_head *l)
@ -431,6 +437,11 @@ static void hfi1_qp_schedule(struct rvt_qp *qp)
if (ret)
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
}
if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_TID)) {
ret = hfi1_schedule_tid_send(qp);
if (ret)
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
}
}
void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
@ -450,8 +461,27 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
{
if (iowait_set_work_flag(wait) == IOWAIT_IB_SE)
struct hfi1_qp_priv *priv = qp->priv;
if (iowait_set_work_flag(wait) == IOWAIT_IB_SE) {
qp->s_flags &= ~RVT_S_BUSY;
/*
* If we are sending a first-leg packet from the second leg,
* we need to clear the busy flag from priv->s_flags to
* avoid a race condition when the qp wakes up before
* the call to hfi1_verbs_send() returns to the second
* leg. In that case, the second leg will terminate without
* being re-scheduled, resulting in failure to send TID RDMA
* WRITE DATA and TID RDMA ACK packets.
*/
if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
priv->s_flags &= ~(HFI1_S_TID_BUSY_SET |
RVT_S_BUSY);
iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
}
} else {
priv->s_flags &= ~RVT_S_BUSY;
}
}
static int iowait_sleep(
@ -488,6 +518,7 @@ static int iowait_sleep(
ibp->rvp.n_dmawait++;
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
iowait_get_priority(&priv->s_iowait);
iowait_queue(pkts_sent, &priv->s_iowait,
&sde->dmawait);
priv->s_iowait.lock = &sde->waitlock;
@ -537,6 +568,17 @@ static void iowait_sdma_drained(struct iowait *wait)
spin_unlock_irqrestore(&qp->s_lock, flags);
}
static void hfi1_init_priority(struct iowait *w)
{
struct rvt_qp *qp = iowait_to_qp(w);
struct hfi1_qp_priv *priv = qp->priv;
if (qp->s_flags & RVT_S_ACK_PENDING)
w->priority++;
if (priv->s_flags & RVT_S_ACK_PENDING)
w->priority++;
}
/**
* qp_to_sdma_engine - map a qp to a send engine
* @qp: the QP
@ -694,10 +736,11 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
&priv->s_iowait,
1,
_hfi1_do_send,
NULL,
_hfi1_do_tid_send,
iowait_sleep,
iowait_wakeup,
iowait_sdma_drained);
iowait_sdma_drained,
hfi1_init_priority);
return priv;
}
@ -755,6 +798,8 @@ void quiesce_qp(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
hfi1_del_tid_reap_timer(qp);
hfi1_del_tid_retry_timer(qp);
iowait_sdma_drain(&priv->s_iowait);
qp_pio_drain(qp);
flush_tx_list(qp);
@ -850,7 +895,8 @@ void notify_error_qp(struct rvt_qp *qp)
if (lock) {
write_seqlock(lock);
if (!list_empty(&priv->s_iowait.list) &&
!(qp->s_flags & RVT_S_BUSY)) {
!(qp->s_flags & RVT_S_BUSY) &&
!(priv->s_flags & RVT_S_BUSY)) {
qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
@ -859,7 +905,8 @@ void notify_error_qp(struct rvt_qp *qp)
write_sequnlock(lock);
}
if (!(qp->s_flags & RVT_S_BUSY)) {
if (!(qp->s_flags & RVT_S_BUSY) && !(priv->s_flags & RVT_S_BUSY)) {
qp->s_hdrwords = 0;
if (qp->s_rdma_mr) {
rvt_put_mr(qp->s_rdma_mr);
qp->s_rdma_mr = NULL;

View File

@ -64,12 +64,16 @@ extern const struct rvt_operation_params hfi1_post_parms[];
* HFI1_S_AHG_CLEAR - have send engine clear ahg state
* HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain
* HFI1_S_WAIT_TID_SPACE - a QP is waiting for TID resource
* HFI1_S_WAIT_TID_RESP - waiting for a TID RDMA WRITE response
* HFI1_S_WAIT_HALT - halt the first leg send engine
* HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1
*/
#define HFI1_S_AHG_VALID 0x80000000
#define HFI1_S_AHG_CLEAR 0x40000000
#define HFI1_S_WAIT_PIO_DRAIN 0x20000000
#define HFI1_S_WAIT_TID_SPACE 0x10000000
#define HFI1_S_WAIT_TID_RESP 0x08000000
#define HFI1_S_WAIT_HALT 0x04000000
#define HFI1_S_MIN_BIT_MASK 0x01000000
/*
@ -78,6 +82,7 @@ extern const struct rvt_operation_params hfi1_post_parms[];
#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN)
#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
#define HFI1_S_ANY_TID_WAIT_SEND (RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA)
/*
* Send if not busy or waiting for I/O and either

View File

@ -111,15 +111,17 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
struct hfi1_pkt_state *ps)
{
struct rvt_ack_entry *e;
u32 hwords;
u32 hwords, hdrlen;
u32 len = 0;
u32 bth0 = 0, bth2 = 0;
u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
int middle = 0;
u32 pmtu = qp->pmtu;
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_qp_priv *qpriv = qp->priv;
bool last_pkt;
u32 delta;
u8 next = qp->s_tail_ack_queue;
struct tid_rdma_request *req;
trace_hfi1_rsp_make_rc_ack(qp, 0);
lockdep_assert_held(&qp->s_lock);
@ -127,7 +129,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
goto bail;
if (priv->hdr_type == HFI1_PKT_TYPE_9B)
if (qpriv->hdr_type == HFI1_PKT_TYPE_9B)
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
else
@ -149,9 +151,18 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
* response has been sent instead of only being
* constructed.
*/
if (++qp->s_tail_ack_queue >
rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
qp->s_tail_ack_queue = 0;
if (++next > rvt_size_atomic(&dev->rdi))
next = 0;
/*
* Only advance the s_acked_ack_queue pointer if there
* have been no TID RDMA requests.
*/
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
if (e->opcode != TID_OP(WRITE_REQ) &&
qp->s_acked_ack_queue == qp->s_tail_ack_queue)
qp->s_acked_ack_queue = next;
qp->s_tail_ack_queue = next;
trace_hfi1_rsp_make_rc_ack(qp, e->psn);
/* FALLTHROUGH */
case OP(SEND_ONLY):
case OP(ACKNOWLEDGE):
@ -163,6 +174,12 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
}
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
/* Check for tid write fence */
if ((qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK) ||
hfi1_tid_rdma_ack_interlock(qp, e)) {
iowait_set_flag(&qpriv->s_iowait, IOWAIT_PENDING_IB);
goto bail;
}
if (e->opcode == OP(RDMA_READ_REQUEST)) {
/*
* If a RDMA read response is being resent and
@ -172,6 +189,10 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
*/
len = e->rdma_sge.sge_length;
if (len && !e->rdma_sge.mr) {
if (qp->s_acked_ack_queue ==
qp->s_tail_ack_queue)
qp->s_acked_ack_queue =
qp->r_head_ack_queue;
qp->s_tail_ack_queue = qp->r_head_ack_queue;
goto bail;
}
@ -193,6 +214,21 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
hwords++;
qp->s_ack_rdma_psn = e->psn;
bth2 = mask_psn(qp->s_ack_rdma_psn++);
} else if (e->opcode == TID_OP(WRITE_REQ)) {
/*
* If a TID RDMA WRITE RESP is being resent, we have to
* wait for the actual request. All requests that are to
* be resent will have their state set to
* TID_REQUEST_RESEND. When the new request arrives, the
* state will be changed to TID_REQUEST_RESEND_ACTIVE.
*/
req = ack_to_tid_req(e);
if (req->state == TID_REQUEST_RESEND ||
req->state == TID_REQUEST_INIT_RESEND)
goto bail;
qp->s_ack_state = TID_OP(WRITE_RESP);
qp->s_ack_rdma_psn = mask_psn(e->psn + req->cur_seg);
goto write_resp;
} else if (e->opcode == TID_OP(READ_REQ)) {
/*
* If a TID RDMA read response is being resent and
@ -202,6 +238,10 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
*/
len = e->rdma_sge.sge_length;
if (len && !e->rdma_sge.mr) {
if (qp->s_acked_ack_queue ==
qp->s_tail_ack_queue)
qp->s_acked_ack_queue =
qp->r_head_ack_queue;
qp->s_tail_ack_queue = qp->r_head_ack_queue;
goto bail;
}
@ -224,6 +264,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
bth2 = mask_psn(e->psn);
e->sent = 1;
}
trace_hfi1_tid_write_rsp_make_rc_ack(qp);
bth0 = qp->s_ack_state << 24;
break;
@ -250,6 +291,61 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
bth2 = mask_psn(qp->s_ack_rdma_psn++);
break;
case TID_OP(WRITE_RESP):
write_resp:
/*
* 1. Check if RVT_S_ACK_PENDING is set. If yes,
* goto normal.
* 2. Attempt to allocate TID resources.
* 3. Remove RVT_S_RESP_PENDING flags from s_flags
* 4. If resources not available:
* 4.1 Set RVT_S_WAIT_TID_SPACE
* 4.2 Queue QP on RCD TID queue
* 4.3 Put QP on iowait list.
* 4.4 Build IB RNR NAK with appropriate timeout value
* 4.5 Return indication progress made.
* 5. If resources are available:
* 5.1 Program HW flow CSRs
* 5.2 Build TID RDMA WRITE RESP packet
* 5.3 If more resources needed, do 2.1 - 2.3.
* 5.4 Wake up next QP on RCD TID queue.
* 5.5 Return indication progress made.
*/
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
req = ack_to_tid_req(e);
/*
* Send scheduled RNR NAK's. RNR NAK's need to be sent at
* segment boundaries, not at request boundaries. Don't change
* s_ack_state because we are still in the middle of a request
*/
if (qpriv->rnr_nak_state == TID_RNR_NAK_SEND &&
qp->s_tail_ack_queue == qpriv->r_tid_alloc &&
req->cur_seg == req->alloc_seg) {
qpriv->rnr_nak_state = TID_RNR_NAK_SENT;
goto normal_no_state;
}
bth2 = mask_psn(qp->s_ack_rdma_psn);
hdrlen = hfi1_build_tid_rdma_write_resp(qp, e, ohdr, &bth1,
bth2, &len,
&ps->s_txreq->ss);
if (!hdrlen)
return 0;
hwords += hdrlen;
bth0 = qp->s_ack_state << 24;
qp->s_ack_rdma_psn++;
trace_hfi1_tid_req_make_rc_ack_write(qp, 0, e->opcode, e->psn,
e->lpsn, req);
if (req->cur_seg != req->total_segs)
break;
e->sent = 1;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
break;
case TID_OP(READ_RESP):
read_resp:
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
@ -281,8 +377,7 @@ normal:
* (see above).
*/
qp->s_ack_state = OP(SEND_ONLY);
qp->s_flags &= ~RVT_S_ACK_PENDING;
ps->s_txreq->ss = NULL;
normal_no_state:
if (qp->s_nak_state)
ohdr->u.aeth =
cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
@ -294,9 +389,12 @@ normal:
len = 0;
bth0 = OP(ACKNOWLEDGE) << 24;
bth2 = mask_psn(qp->s_ack_psn);
qp->s_flags &= ~RVT_S_ACK_PENDING;
ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
ps->s_txreq->ss = NULL;
}
qp->s_rdma_ack_cnt++;
ps->s_txreq->sde = priv->s_sde;
ps->s_txreq->sde = qpriv->s_sde;
ps->s_txreq->s_cur_size = len;
ps->s_txreq->hdr_dwords = hwords;
hfi1_make_ruc_header(qp, ohdr, bth0, bth1, bth2, middle, ps);
@ -349,6 +447,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
int middle = 0;
int delta;
struct tid_rdma_flow *flow = NULL;
struct tid_rdma_params *remote;
trace_hfi1_sender_make_rc_req(qp);
lockdep_assert_held(&qp->s_lock);
@ -397,7 +496,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto done_free_tx;
}
if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK | HFI1_S_WAIT_HALT))
goto bail;
if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) {
@ -569,6 +668,113 @@ no_flow_control:
qp->s_cur = 0;
break;
case IB_WR_TID_RDMA_WRITE:
if (newreq) {
/*
* Limit the number of TID RDMA WRITE requests.
*/
if (atomic_read(&priv->n_tid_requests) >=
HFI1_TID_RDMA_WRITE_CNT)
goto bail;
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
}
hwords += hfi1_build_tid_rdma_write_req(qp, wqe, ohdr,
&bth1, &bth2,
&len);
ss = NULL;
if (priv->s_tid_cur == HFI1_QP_WQE_INVALID) {
priv->s_tid_cur = qp->s_cur;
if (priv->s_tid_tail == HFI1_QP_WQE_INVALID) {
priv->s_tid_tail = qp->s_cur;
priv->s_state = TID_OP(WRITE_RESP);
}
} else if (priv->s_tid_cur == priv->s_tid_head) {
struct rvt_swqe *__w;
struct tid_rdma_request *__r;
__w = rvt_get_swqe_ptr(qp, priv->s_tid_cur);
__r = wqe_to_tid_req(__w);
/*
* The s_tid_cur pointer is advanced to s_cur if
* any of the following conditions about the WQE
* to which s_ti_cur currently points to are
* satisfied:
* 1. The request is not a TID RDMA WRITE
* request,
* 2. The request is in the INACTIVE or
* COMPLETE states (TID RDMA READ requests
* stay at INACTIVE and TID RDMA WRITE
* transition to COMPLETE when done),
* 3. The request is in the ACTIVE or SYNC
* state and the number of completed
* segments is equal to the total segment
* count.
* (If ACTIVE, the request is waiting for
* ACKs. If SYNC, the request has not
* received any responses because it's
* waiting on a sync point.)
*/
if (__w->wr.opcode != IB_WR_TID_RDMA_WRITE ||
__r->state == TID_REQUEST_INACTIVE ||
__r->state == TID_REQUEST_COMPLETE ||
((__r->state == TID_REQUEST_ACTIVE ||
__r->state == TID_REQUEST_SYNC) &&
__r->comp_seg == __r->total_segs)) {
if (priv->s_tid_tail ==
priv->s_tid_cur &&
priv->s_state ==
TID_OP(WRITE_DATA_LAST)) {
priv->s_tid_tail = qp->s_cur;
priv->s_state =
TID_OP(WRITE_RESP);
}
priv->s_tid_cur = qp->s_cur;
}
/*
* A corner case: when the last TID RDMA WRITE
* request was completed, s_tid_head,
* s_tid_cur, and s_tid_tail all point to the
* same location. Other requests are posted and
* s_cur wraps around to the same location,
* where a new TID RDMA WRITE is posted. In
* this case, none of the indices need to be
* updated. However, the priv->s_state should.
*/
if (priv->s_tid_tail == qp->s_cur &&
priv->s_state == TID_OP(WRITE_DATA_LAST))
priv->s_state = TID_OP(WRITE_RESP);
}
req = wqe_to_tid_req(wqe);
if (newreq) {
priv->s_tid_head = qp->s_cur;
priv->pending_tid_w_resp += req->total_segs;
atomic_inc(&priv->n_tid_requests);
atomic_dec(&priv->n_requests);
} else {
req->state = TID_REQUEST_RESEND;
req->comp_seg = delta_psn(bth2, wqe->psn);
/*
* Pull back any segments since we are going
* to re-receive them.
*/
req->setup_head = req->clear_tail;
priv->pending_tid_w_resp +=
delta_psn(wqe->lpsn, bth2) + 1;
}
trace_hfi1_tid_write_sender_make_req(qp, newreq);
trace_hfi1_tid_req_make_req_write(qp, newreq,
wqe->wr.opcode,
wqe->psn, wqe->lpsn,
req);
if (++qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
case IB_WR_RDMA_READ:
/*
* Don't allow more operations to be started
@ -728,7 +934,8 @@ no_flow_control:
if (qp->s_tail >= qp->s_size)
qp->s_tail = 0;
}
if (wqe->wr.opcode == IB_WR_RDMA_READ)
if (wqe->wr.opcode == IB_WR_RDMA_READ ||
wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
qp->s_psn = wqe->lpsn + 1;
else if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
qp->s_psn = req->s_next_psn;
@ -848,6 +1055,35 @@ no_flow_control:
if (qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
case TID_OP(WRITE_RESP):
/*
* This value for s_state is used for restarting a TID RDMA
* WRITE request. See comment in OP(RDMA_READ_RESPONSE_MIDDLE
* for more).
*/
req = wqe_to_tid_req(wqe);
req->state = TID_REQUEST_RESEND;
rcu_read_lock();
remote = rcu_dereference(priv->tid_rdma.remote);
req->comp_seg = delta_psn(qp->s_psn, wqe->psn);
len = wqe->length - (req->comp_seg * remote->max_len);
rcu_read_unlock();
bth2 = mask_psn(qp->s_psn);
hwords += hfi1_build_tid_rdma_write_req(qp, wqe, ohdr, &bth1,
&bth2, &len);
qp->s_psn = wqe->lpsn + 1;
ss = NULL;
qp->s_state = TID_OP(WRITE_REQ);
priv->pending_tid_w_resp += delta_psn(wqe->lpsn, bth2) + 1;
priv->s_tid_cur = qp->s_cur;
if (++qp->s_cur == qp->s_size)
qp->s_cur = 0;
trace_hfi1_tid_req_make_req_write(qp, 0, wqe->wr.opcode,
wqe->psn, wqe->lpsn, req);
break;
case TID_OP(READ_RESP):
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
goto bail;
@ -948,7 +1184,8 @@ no_flow_control:
}
qp->s_sending_hpsn = bth2;
delta = delta_psn(bth2, wqe->psn);
if (delta && delta % HFI1_PSN_CREDIT == 0)
if (delta && delta % HFI1_PSN_CREDIT == 0 &&
wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
bth2 |= IB_BTH_REQ_ACK;
if (qp->s_flags & RVT_S_SEND_ONE) {
qp->s_flags &= ~RVT_S_SEND_ONE;
@ -981,6 +1218,12 @@ bail:
bail_no_tx:
ps->s_txreq = NULL;
qp->s_flags &= ~RVT_S_BUSY;
/*
* If we didn't get a txreq, the QP will be woken up later to try
* again. Set the flags to indicate which work item to wake
* up.
*/
iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
return 0;
}
@ -1268,6 +1511,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
lockdep_assert_held(&qp->s_lock);
qp->s_cur = n;
priv->pending_tid_r_segs = 0;
priv->pending_tid_w_resp = 0;
qp->s_num_rd_atomic = 0;
/*
@ -1325,6 +1569,10 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
break;
case IB_WR_TID_RDMA_WRITE:
qp->s_state = TID_OP(WRITE_RESP);
break;
case IB_WR_RDMA_READ:
qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
break;
@ -1389,6 +1637,7 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
wqe = do_rc_completion(qp, wqe, ibp);
qp->s_flags &= ~RVT_S_WAIT_ACK;
} else {
trace_hfi1_tid_write_sender_restart_rc(qp, 0);
if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
struct tid_rdma_request *req;
@ -1418,7 +1667,7 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
RVT_S_WAIT_ACK);
RVT_S_WAIT_ACK | HFI1_S_WAIT_TID_RESP);
if (wait)
qp->s_flags |= RVT_S_SEND_ONE;
reset_psn(qp, psn);
@ -1426,7 +1675,8 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
/*
* Set qp->s_sending_psn to the next PSN after the given one.
* This would be psn+1 except when RDMA reads are present.
* This would be psn+1 except when RDMA reads or TID RDMA ops
* are present.
*/
static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
{
@ -1439,7 +1689,8 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
wqe = rvt_get_swqe_ptr(qp, n);
if (cmp_psn(psn, wqe->lpsn) <= 0) {
if (wqe->wr.opcode == IB_WR_RDMA_READ ||
wqe->wr.opcode == IB_WR_TID_RDMA_READ)
wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
qp->s_sending_psn = wqe->lpsn + 1;
else
qp->s_sending_psn = psn + 1;
@ -1462,8 +1713,9 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
struct rvt_swqe *wqe;
struct ib_header *hdr = NULL;
struct hfi1_16b_header *hdr_16b = NULL;
u32 opcode;
u32 opcode, head, tail;
u32 psn;
struct tid_rdma_request *req;
lockdep_assert_held(&qp->s_lock);
if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK))
@ -1490,29 +1742,84 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
opcode = ib_bth_get_opcode(ohdr);
if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) ||
opcode == TID_OP(READ_RESP)) {
opcode == TID_OP(READ_RESP) ||
opcode == TID_OP(WRITE_RESP)) {
WARN_ON(!qp->s_rdma_ack_cnt);
qp->s_rdma_ack_cnt--;
return;
}
psn = ib_bth_get_psn(ohdr);
reset_sending_psn(qp, psn);
/*
* Don't attempt to reset the sending PSN for packets in the
* KDETH PSN space since the PSN does not match anything.
*/
if (opcode != TID_OP(WRITE_DATA) &&
opcode != TID_OP(WRITE_DATA_LAST) &&
opcode != TID_OP(ACK) && opcode != TID_OP(RESYNC))
reset_sending_psn(qp, psn);
/* Handle TID RDMA WRITE packets differently */
if (opcode >= TID_OP(WRITE_REQ) &&
opcode <= TID_OP(WRITE_DATA_LAST)) {
head = priv->s_tid_head;
tail = priv->s_tid_cur;
/*
* s_tid_cur is set to s_tid_head in the case, where
* a new TID RDMA request is being started and all
* previous ones have been completed.
* Therefore, we need to do a secondary check in order
* to properly determine whether we should start the
* RC timer.
*/
wqe = rvt_get_swqe_ptr(qp, tail);
req = wqe_to_tid_req(wqe);
if (head == tail && req->comp_seg < req->total_segs) {
if (tail == 0)
tail = qp->s_size - 1;
else
tail -= 1;
}
} else {
head = qp->s_tail;
tail = qp->s_acked;
}
/*
* Start timer after a packet requesting an ACK has been sent and
* there are still requests that haven't been acked.
*/
if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
if ((psn & IB_BTH_REQ_ACK) && tail != head &&
opcode != TID_OP(WRITE_DATA) && opcode != TID_OP(WRITE_DATA_LAST) &&
opcode != TID_OP(RESYNC) &&
!(qp->s_flags &
(RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
(RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
if (opcode == TID_OP(READ_REQ))
rvt_add_retry_timer_ext(qp, priv->timeout_shift);
else
rvt_add_retry_timer(qp);
}
/* Start TID RDMA ACK timer */
if ((opcode == TID_OP(WRITE_DATA) ||
opcode == TID_OP(WRITE_DATA_LAST) ||
opcode == TID_OP(RESYNC)) &&
(psn & IB_BTH_REQ_ACK) &&
!(priv->s_flags & HFI1_S_TID_RETRY_TIMER) &&
(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
/*
* The TID RDMA ACK packet could be received before this
* function is called. Therefore, add the timer only if TID
* RDMA ACK packets are actually pending.
*/
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
req = wqe_to_tid_req(wqe);
if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
req->ack_seg < req->cur_seg)
hfi1_add_tid_retry_timer(qp);
}
while (qp->s_last != qp->s_acked) {
u32 s_last;
@ -1611,7 +1918,16 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
}
qp->s_retry = qp->s_retry_cnt;
update_last_psn(qp, wqe->lpsn);
/*
* Don't update the last PSN if the request being completed is
* a TID RDMA WRITE request.
* Completion of the TID RDMA WRITE requests are done by the
* TID RDMA ACKs and as such could be for a request that has
* already been ACKed as far as the IB state machine is
* concerned.
*/
if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
update_last_psn(qp, wqe->lpsn);
/*
* If we are completing a request which is in the process of
@ -1641,6 +1957,54 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
return wqe;
}
static void set_restart_qp(struct rvt_qp *qp, struct hfi1_ctxtdata *rcd)
{
/* Retry this request. */
if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
qp->r_flags |= RVT_R_RDMAR_SEQ;
hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
}
/**
* update_qp_retry_state - Update qp retry state.
* @qp: the QP
* @psn: the packet sequence number of the TID RDMA WRITE RESP.
* @spsn: The start psn for the given TID RDMA WRITE swqe.
* @lpsn: The last psn for the given TID RDMA WRITE swqe.
*
* This function is called to update the qp retry state upon
* receiving a TID WRITE RESP after the qp is scheduled to retry
* a request.
*/
static void update_qp_retry_state(struct rvt_qp *qp, u32 psn, u32 spsn,
u32 lpsn)
{
struct hfi1_qp_priv *qpriv = qp->priv;
qp->s_psn = psn + 1;
/*
* If this is the first TID RDMA WRITE RESP packet for the current
* request, change the s_state so that the retry will be processed
* correctly. Similarly, if this is the last TID RDMA WRITE RESP
* packet, change the s_state and advance the s_cur.
*/
if (cmp_psn(psn, lpsn) >= 0) {
qp->s_cur = qpriv->s_tid_cur + 1;
if (qp->s_cur >= qp->s_size)
qp->s_cur = 0;
qp->s_state = TID_OP(WRITE_REQ);
} else if (!cmp_psn(psn, spsn)) {
qp->s_cur = qpriv->s_tid_cur;
qp->s_state = TID_OP(WRITE_RESP);
}
}
/**
* do_rc_ack - process an incoming RC ACK
* @qp: the QP the ACK came in on
@ -1662,6 +2026,7 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
int ret = 0;
u32 ack_psn;
int diff;
struct rvt_dev_info *rdi;
lockdep_assert_held(&qp->s_lock);
/*
@ -1708,18 +2073,10 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
(opcode != TID_OP(READ_RESP) || diff != 0)) ||
((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
(opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
/* Retry this request. */
if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
qp->r_flags |= RVT_R_RDMAR_SEQ;
hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
rvt_get_qp(qp);
list_add_tail(&qp->rspwait,
&rcd->qp_wait_list);
}
}
(opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0)) ||
(wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
(delta_psn(psn, qp->s_last_psn) != 1))) {
set_restart_qp(qp, rcd);
/*
* No need to process the ACK/NAK since we are
* restarting an earlier request.
@ -1751,6 +2108,14 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
hfi1_schedule_send(qp);
}
}
/*
* TID RDMA WRITE requests will be completed by the TID RDMA
* ACK packet handler (see tid_rdma.c).
*/
if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
break;
wqe = do_rc_completion(qp, wqe, ibp);
if (qp->s_acked == qp->s_tail)
break;
@ -1768,17 +2133,60 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
else
rvt_stop_rc_timers(qp);
} else if (qp->s_acked != qp->s_tail) {
struct rvt_swqe *__w = NULL;
if (qpriv->s_tid_cur != HFI1_QP_WQE_INVALID)
__w = rvt_get_swqe_ptr(qp, qpriv->s_tid_cur);
/*
* We are expecting more ACKs so
* mod the retry timer.
* Stop timers if we've received all of the TID RDMA
* WRITE * responses.
*/
rvt_mod_retry_timer(qp);
/*
* We can stop re-sending the earlier packets and
* continue with the next packet the receiver wants.
*/
if (cmp_psn(qp->s_psn, psn) <= 0)
reset_psn(qp, psn + 1);
if (__w && __w->wr.opcode == IB_WR_TID_RDMA_WRITE &&
opcode == TID_OP(WRITE_RESP)) {
/*
* Normally, the loop above would correctly
* process all WQEs from s_acked onward and
* either complete them or check for correct
* PSN sequencing.
* However, for TID RDMA, due to pipelining,
* the response may not be for the request at
* s_acked so the above look would just be
* skipped. This does not allow for checking
* the PSN sequencing. It has to be done
* separately.
*/
if (cmp_psn(psn, qp->s_last_psn + 1)) {
set_restart_qp(qp, rcd);
goto bail_stop;
}
/*
* If the psn is being resent, stop the
* resending.
*/
if (qp->s_cur != qp->s_tail &&
cmp_psn(qp->s_psn, psn) <= 0)
update_qp_retry_state(qp, psn,
__w->psn,
__w->lpsn);
else if (--qpriv->pending_tid_w_resp)
rvt_mod_retry_timer(qp);
else
rvt_stop_rc_timers(qp);
} else {
/*
* We are expecting more ACKs so
* mod the retry timer.
*/
rvt_mod_retry_timer(qp);
/*
* We can stop re-sending the earlier packets
* and continue with the next packet the
* receiver wants.
*/
if (cmp_psn(qp->s_psn, psn) <= 0)
reset_psn(qp, psn + 1);
}
} else {
/* No more acks - kill all timers */
rvt_stop_rc_timers(qp);
@ -1794,6 +2202,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
rvt_get_credit(qp, aeth);
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
qp->s_retry = qp->s_retry_cnt;
/*
* If the current request is a TID RDMA WRITE request and the
* response is not a TID RDMA WRITE RESP packet, s_last_psn
* can't be advanced.
*/
if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
opcode != TID_OP(WRITE_RESP) &&
cmp_psn(psn, wqe->psn) >= 0)
return 1;
update_last_psn(qp, psn);
return 1;
@ -1803,20 +2220,31 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
goto bail_stop;
if (qp->s_flags & RVT_S_WAIT_RNR)
goto bail_stop;
if (qp->s_rnr_retry == 0) {
rdi = ib_to_rvt(qp->ibqp.device);
if (qp->s_rnr_retry == 0 &&
!((rdi->post_parms[wqe->wr.opcode].flags &
RVT_OPERATION_IGN_RNR_CNT) &&
qp->s_rnr_retry_cnt == 0)) {
status = IB_WC_RNR_RETRY_EXC_ERR;
goto class_b;
}
if (qp->s_rnr_retry_cnt < 7)
if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
qp->s_rnr_retry--;
/* The last valid PSN is the previous PSN. */
update_last_psn(qp, psn - 1);
/*
* The last valid PSN is the previous PSN. For TID RDMA WRITE
* request, s_last_psn should be incremented only when a TID
* RDMA WRITE RESP is received to avoid skipping lost TID RDMA
* WRITE RESP packets.
*/
if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
reset_psn(qp, qp->s_last_psn + 1);
} else {
update_last_psn(qp, psn - 1);
reset_psn(qp, psn);
}
ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
reset_psn(qp, psn);
qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
rvt_stop_rc_timers(qp);
rvt_add_rnr_timer(qp, aeth);
@ -1901,6 +2329,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
while (cmp_psn(psn, wqe->lpsn) > 0) {
if (wqe->wr.opcode == IB_WR_RDMA_READ ||
wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
wqe->wr.opcode == IB_WR_TID_RDMA_WRITE ||
wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
break;
@ -2235,6 +2664,8 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
e->psn = psn;
if (old_req)
goto unlock_done;
if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
qp->s_acked_ack_queue = prev;
qp->s_tail_ack_queue = prev;
break;
}
@ -2248,6 +2679,8 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
*/
if (!e || e->opcode != (u8)opcode || old_req)
goto unlock_done;
if (qp->s_tail_ack_queue == qp->s_acked_ack_queue)
qp->s_acked_ack_queue = prev;
qp->s_tail_ack_queue = prev;
break;
}
@ -2274,6 +2707,8 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
* Resend the RDMA read or atomic op which
* ACKs this duplicate request.
*/
if (qp->s_tail_ack_queue == qp->s_acked_ack_queue)
qp->s_acked_ack_queue = mra;
qp->s_tail_ack_queue = mra;
break;
}
@ -2388,6 +2823,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
void *data = packet->payload;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
struct hfi1_qp_priv *qpriv = qp->priv;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct ib_other_headers *ohdr = packet->ohdr;
u32 opcode = packet->opcode;
@ -2646,7 +3082,7 @@ send_last:
if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
next = 0;
spin_lock_irqsave(&qp->s_lock, flags);
if (unlikely(next == qp->s_tail_ack_queue)) {
if (unlikely(next == qp->s_acked_ack_queue)) {
if (!qp->s_ack_queue[next].sent)
goto nack_inv_unlck;
update_ack_queue(qp, next);
@ -2693,6 +3129,7 @@ send_last:
qp->r_state = opcode;
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
qpriv->r_tid_alloc = qp->r_head_ack_queue;
/* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
@ -2723,7 +3160,7 @@ send_last:
if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
next = 0;
spin_lock_irqsave(&qp->s_lock, flags);
if (unlikely(next == qp->s_tail_ack_queue)) {
if (unlikely(next == qp->s_acked_ack_queue)) {
if (!qp->s_ack_queue[next].sent)
goto nack_inv_unlck;
update_ack_queue(qp, next);
@ -2766,6 +3203,7 @@ ack:
qp->r_state = opcode;
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
qpriv->r_tid_alloc = qp->r_head_ack_queue;
/* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;

View File

@ -18,6 +18,7 @@ static inline void update_ack_queue(struct rvt_qp *qp, unsigned int n)
if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
next = 0;
qp->s_tail_ack_queue = next;
qp->s_acked_ack_queue = next;
qp->s_ack_state = OP(ACKNOWLEDGE);
}

View File

@ -453,11 +453,13 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
#define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
/**
* schedule_send_yield - test for a yield required for QP send engine
* hfi1_schedule_send_yield - test for a yield required for QP
* send engine
* @timeout: Final time for timeout slice for jiffies
* @qp: a pointer to QP
* @ps: a pointer to a structure with commonly lookup values for
* the the send engine progress
* @tid - true if it is the tid leg
*
* This routine checks if the time slice for the QP has expired
* for RC QPs, if so an additional work entry is queued. At this
@ -465,8 +467,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
* returns true if a yield is required, otherwise, false
* is returned.
*/
static bool schedule_send_yield(struct rvt_qp *qp,
struct hfi1_pkt_state *ps)
bool hfi1_schedule_send_yield(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
bool tid)
{
ps->pkts_sent = true;
@ -474,8 +476,24 @@ static bool schedule_send_yield(struct rvt_qp *qp,
if (!ps->in_thread ||
workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) {
spin_lock_irqsave(&qp->s_lock, ps->flags);
qp->s_flags &= ~RVT_S_BUSY;
hfi1_schedule_send(qp);
if (!tid) {
qp->s_flags &= ~RVT_S_BUSY;
hfi1_schedule_send(qp);
} else {
struct hfi1_qp_priv *priv = qp->priv;
if (priv->s_flags &
HFI1_S_TID_BUSY_SET) {
qp->s_flags &= ~RVT_S_BUSY;
priv->s_flags &=
~(HFI1_S_TID_BUSY_SET |
RVT_S_BUSY);
} else {
priv->s_flags &= ~RVT_S_BUSY;
}
hfi1_schedule_tid_send(qp);
}
spin_unlock_irqrestore(&qp->s_lock, ps->flags);
this_cpu_inc(*ps->ppd->dd->send_schedule);
trace_hfi1_rc_expired_time_slice(qp, true);
@ -576,6 +594,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
do {
/* Check for a constructed packet to be sent. */
if (ps.s_txreq) {
if (priv->s_flags & HFI1_S_TID_BUSY_SET)
qp->s_flags |= RVT_S_BUSY;
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
/*
* If the packet cannot be sent now, return and
@ -585,7 +605,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
return;
/* allow other tasks to run */
if (schedule_send_yield(qp, &ps))
if (hfi1_schedule_send_yield(qp, &ps, false))
return;
spin_lock_irqsave(&qp->s_lock, ps.flags);

View File

@ -1747,10 +1747,9 @@ retry:
*/
static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
{
struct iowait *wait, *nw;
struct iowait *wait, *nw, *twait;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
uint i, n = 0, seq, max_idx = 0;
u8 max_starved_cnt = 0;
uint i, n = 0, seq, tidx = 0;
#ifdef CONFIG_SDMA_VERBOSITY
dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
@ -1775,13 +1774,20 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
continue;
if (n == ARRAY_SIZE(waits))
break;
iowait_init_priority(wait);
num_desc = iowait_get_all_desc(wait);
if (num_desc > avail)
break;
avail -= num_desc;
/* Find the most starved wait memeber */
iowait_starve_find_max(wait, &max_starved_cnt,
n, &max_idx);
/* Find the top-priority wait memeber */
if (n) {
twait = waits[tidx];
tidx =
iowait_priority_update_top(wait,
twait,
n,
tidx);
}
list_del_init(&wait->list);
waits[n++] = wait;
}
@ -1790,12 +1796,12 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
}
} while (read_seqretry(&sde->waitlock, seq));
/* Schedule the most starved one first */
/* Schedule the top-priority entry first */
if (n)
waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON);
waits[tidx]->wakeup(waits[tidx], SDMA_AVAIL_REASON);
for (i = 0; i < n; i++)
if (i != max_idx)
if (i != tidx)
waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
}

View File

@ -91,6 +91,7 @@ struct sdma_desc {
#define SDMA_TXREQ_F_URGENT 0x0001
#define SDMA_TXREQ_F_AHG_COPY 0x0002
#define SDMA_TXREQ_F_USE_AHG 0x0004
#define SDMA_TXREQ_F_VIP 0x0010
struct sdma_txreq;
typedef void (*callback_t)(struct sdma_txreq *, int);

File diff suppressed because it is too large Load Diff

View File

@ -25,8 +25,34 @@
* s_flags, there are no collisions.
*
* HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
* HFI1_R_TID_WAIT_INTERLCK - QP is waiting for responder interlock
*/
#define HFI1_S_TID_BUSY_SET BIT(0)
/* BIT(1) reserved for RVT_S_BUSY. */
#define HFI1_R_TID_RSC_TIMER BIT(2)
/* BIT(3) reserved for RVT_S_RESP_PENDING. */
/* BIT(4) reserved for RVT_S_ACK_PENDING. */
#define HFI1_S_TID_WAIT_INTERLCK BIT(5)
#define HFI1_R_TID_WAIT_INTERLCK BIT(6)
/* BIT(7) - BIT(15) reserved for RVT_S_WAIT_*. */
/* BIT(16) reserved for RVT_S_SEND_ONE */
#define HFI1_S_TID_RETRY_TIMER BIT(17)
/* BIT(18) reserved for RVT_S_ECN. */
#define HFI1_R_TID_SW_PSN BIT(19)
/* BIT(26) reserved for HFI1_S_WAIT_HALT */
/* BIT(27) reserved for HFI1_S_WAIT_TID_RESP */
/* BIT(28) reserved for HFI1_S_WAIT_TID_SPACE */
/*
* Unlike regular IB RDMA VERBS, which do not require an entry
* in the s_ack_queue, TID RDMA WRITE requests do because they
* generate responses.
* Therefore, the s_ack_queue needs to be extended by a certain
* amount. The key point is that the queue needs to be extended
* without letting the "user" know so they user doesn't end up
* using these extra entries.
*/
#define HFI1_TID_RDMA_WRITE_CNT 8
struct tid_rdma_params {
struct rcu_head rcu_head;
@ -78,20 +104,25 @@ struct tid_rdma_request {
} e;
struct tid_rdma_flow *flows; /* array of tid flows */
struct rvt_sge_state ss; /* SGE state for TID RDMA requests */
u16 n_flows; /* size of the flow buffer window */
u16 setup_head; /* flow index we are setting up */
u16 clear_tail; /* flow index we are clearing */
u16 flow_idx; /* flow index most recently set up */
u16 acked_tail;
u32 seg_len;
u32 total_len;
u32 r_ack_psn; /* next expected ack PSN */
u32 r_flow_psn; /* IB PSN of next segment start */
u32 r_last_acked; /* IB PSN of last ACK'ed packet */
u32 s_next_psn; /* IB PSN of next segment start for read */
u32 total_segs; /* segments required to complete a request */
u32 cur_seg; /* index of current segment */
u32 comp_seg; /* index of last completed segment */
u32 ack_seg; /* index of last ack'ed segment */
u32 alloc_seg; /* index of next segment to be allocated */
u32 isge; /* index of "current" sge */
u32 ack_pending; /* num acks pending for this request */
@ -158,11 +189,18 @@ struct tid_rdma_flow {
u8 npagesets;
u8 npkts;
u8 pkt;
u8 resync_npkts;
struct kern_tid_node tnode[TID_RDMA_MAX_PAGES];
struct tid_rdma_pageset pagesets[TID_RDMA_MAX_PAGES];
u32 tid_entry[TID_RDMA_MAX_PAGES];
};
enum tid_rnr_nak_state {
TID_RNR_NAK_INIT = 0,
TID_RNR_NAK_SEND,
TID_RNR_NAK_SENT,
};
bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data);
bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data);
bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data);
@ -228,9 +266,57 @@ static inline void hfi1_setup_tid_rdma_wqe(struct rvt_qp *qp,
struct rvt_swqe *wqe)
{
if (wqe->priv &&
wqe->wr.opcode == IB_WR_RDMA_READ &&
(wqe->wr.opcode == IB_WR_RDMA_READ ||
wqe->wr.opcode == IB_WR_RDMA_WRITE) &&
wqe->length >= TID_RDMA_MIN_SEGMENT_SIZE)
setup_tid_rdma_wqe(qp, wqe);
}
u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len);
void hfi1_compute_tid_rdma_flow_wt(void);
void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_other_headers *ohdr, u32 *bth1,
u32 bth2, u32 *len,
struct rvt_sge_state **ss);
void hfi1_del_tid_reap_timer(struct rvt_qp *qp);
void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet);
bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len);
void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_other_headers *ohdr, u16 iflow,
u32 *bth1, u32 *bth2);
void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet);
void hfi1_add_tid_retry_timer(struct rvt_qp *qp);
void hfi1_del_tid_retry_timer(struct rvt_qp *qp);
u32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr, u32 *bth1,
u32 *bth2, u16 fidx);
void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet);
struct hfi1_pkt_state;
int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
void _hfi1_do_tid_send(struct work_struct *work);
bool hfi1_schedule_tid_send(struct rvt_qp *qp);
bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e);
#endif /* HFI1_TID_RDMA_H */

View File

@ -133,6 +133,11 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2)
#define TID_RDMA_KDETH_DATA "kdeth0 0x%x: kver %u sh %u intr %u tidctrl %u tid %x offset %x kdeth1 0x%x: jkey %x"
#define TID_READ_REQ_PRN "tid_flow_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
#define TID_READ_RSP_PRN "verbs_qp 0x%x"
#define TID_WRITE_REQ_PRN "original_qp 0x%x"
#define TID_WRITE_RSP_PRN "tid_flow_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
#define TID_WRITE_DATA_PRN "verbs_qp 0x%x"
#define TID_ACK_PRN "tid_flow_psn 0x%x verbs_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
#define TID_RESYNC_PRN "verbs_qp 0x%x"
#define OP(transport, op) IB_OPCODE_## transport ## _ ## op
@ -327,6 +332,45 @@ const char *parse_everbs_hdrs(
parse_syndrome(be32_to_cpu(eh->aeth) >> 24),
be32_to_cpu(eh->aeth) & IB_MSN_MASK);
break;
case OP(TID_RDMA, WRITE_REQ):
trace_seq_printf(p, TID_RDMA_KDETH " " RETH_PRN " "
TID_WRITE_REQ_PRN,
le32_to_cpu(eh->tid_rdma.w_req.kdeth0),
le32_to_cpu(eh->tid_rdma.w_req.kdeth1),
ib_u64_get(&eh->tid_rdma.w_req.reth.vaddr),
be32_to_cpu(eh->tid_rdma.w_req.reth.rkey),
be32_to_cpu(eh->tid_rdma.w_req.reth.length),
be32_to_cpu(eh->tid_rdma.w_req.verbs_qp));
break;
case OP(TID_RDMA, WRITE_RESP):
trace_seq_printf(p, TID_RDMA_KDETH " " AETH_PRN " "
TID_WRITE_RSP_PRN,
le32_to_cpu(eh->tid_rdma.w_rsp.kdeth0),
le32_to_cpu(eh->tid_rdma.w_rsp.kdeth1),
be32_to_cpu(eh->tid_rdma.w_rsp.aeth) >> 24,
parse_syndrome(/* aeth */
be32_to_cpu(eh->tid_rdma.w_rsp.aeth)
>> 24),
(be32_to_cpu(eh->tid_rdma.w_rsp.aeth) &
IB_MSN_MASK),
be32_to_cpu(eh->tid_rdma.w_rsp.tid_flow_psn),
be32_to_cpu(eh->tid_rdma.w_rsp.tid_flow_qp),
be32_to_cpu(eh->tid_rdma.w_rsp.verbs_qp));
break;
case OP(TID_RDMA, WRITE_DATA_LAST):
case OP(TID_RDMA, WRITE_DATA):
trace_seq_printf(p, TID_RDMA_KDETH_DATA " " TID_WRITE_DATA_PRN,
le32_to_cpu(eh->tid_rdma.w_data.kdeth0),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, KVER),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, SH),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, INTR),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, TIDCTRL),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, TID),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, OFFSET),
le32_to_cpu(eh->tid_rdma.w_data.kdeth1),
KDETH_GET(eh->tid_rdma.w_data.kdeth1, JKEY),
be32_to_cpu(eh->tid_rdma.w_data.verbs_qp));
break;
case OP(TID_RDMA, READ_REQ):
trace_seq_printf(p, TID_RDMA_KDETH " " RETH_PRN " "
TID_READ_REQ_PRN,
@ -359,6 +403,28 @@ const char *parse_everbs_hdrs(
IB_MSN_MASK),
be32_to_cpu(eh->tid_rdma.r_rsp.verbs_qp));
break;
case OP(TID_RDMA, ACK):
trace_seq_printf(p, TID_RDMA_KDETH " " AETH_PRN " "
TID_ACK_PRN,
le32_to_cpu(eh->tid_rdma.ack.kdeth0),
le32_to_cpu(eh->tid_rdma.ack.kdeth1),
be32_to_cpu(eh->tid_rdma.ack.aeth) >> 24,
parse_syndrome(/* aeth */
be32_to_cpu(eh->tid_rdma.ack.aeth)
>> 24),
(be32_to_cpu(eh->tid_rdma.ack.aeth) &
IB_MSN_MASK),
be32_to_cpu(eh->tid_rdma.ack.tid_flow_psn),
be32_to_cpu(eh->tid_rdma.ack.verbs_psn),
be32_to_cpu(eh->tid_rdma.ack.tid_flow_qp),
be32_to_cpu(eh->tid_rdma.ack.verbs_qp));
break;
case OP(TID_RDMA, RESYNC):
trace_seq_printf(p, TID_RDMA_KDETH " " TID_RESYNC_PRN,
le32_to_cpu(eh->tid_rdma.resync.kdeth0),
le32_to_cpu(eh->tid_rdma.resync.kdeth1),
be32_to_cpu(eh->tid_rdma.resync.verbs_qp));
break;
/* aeth + atomicacketh */
case OP(RC, ATOMIC_ACKNOWLEDGE):
trace_seq_printf(p, AETH_PRN " " ATOMICACKETH_PRN,

View File

@ -79,8 +79,14 @@ __print_symbolic(opcode, \
ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
ib_opcode_name(RC_COMPARE_SWAP), \
ib_opcode_name(RC_FETCH_ADD), \
ib_opcode_name(TID_RDMA_WRITE_REQ), \
ib_opcode_name(TID_RDMA_WRITE_RESP), \
ib_opcode_name(TID_RDMA_WRITE_DATA), \
ib_opcode_name(TID_RDMA_WRITE_DATA_LAST), \
ib_opcode_name(TID_RDMA_READ_REQ), \
ib_opcode_name(TID_RDMA_READ_RESP), \
ib_opcode_name(TID_RDMA_RESYNC), \
ib_opcode_name(TID_RDMA_ACK), \
ib_opcode_name(UC_SEND_FIRST), \
ib_opcode_name(UC_SEND_MIDDLE), \
ib_opcode_name(UC_SEND_LAST), \

View File

@ -40,7 +40,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent);
#define RSP_INFO_PRN "[%s] qpn 0x%x state 0x%x s_state 0x%x psn 0x%x " \
"r_psn 0x%x r_state 0x%x r_flags 0x%x " \
"r_head_ack_queue %u s_tail_ack_queue %u " \
"s_ack_state 0x%x " \
"s_acked_ack_queue %u s_ack_state 0x%x " \
"s_nak_state 0x%x s_flags 0x%x ps_flags 0x%x " \
"iow_flags 0x%lx"
@ -52,20 +52,37 @@ u16 hfi1_trace_get_tid_idx(u32 ent);
#define TID_READ_SENDER_PRN "[%s] qpn 0x%x newreq %u tid_r_reqs %u " \
"tid_r_comp %u pending_tid_r_segs %u " \
"s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx " \
"hw_flow_index %u generation 0x%x " \
"s_state 0x%x hw_flow_index %u generation 0x%x " \
"fpsn 0x%x flow_flags 0x%x"
#define TID_REQ_PRN "[%s] qpn 0x%x newreq %u opcode 0x%x psn 0x%x lpsn 0x%x " \
"cur_seg %u comp_seg %u ack_seg %u " \
"cur_seg %u comp_seg %u ack_seg %u alloc_seg %u " \
"total_segs %u setup_head %u clear_tail %u flow_idx %u " \
"state %u r_flow_psn 0x%x " \
"s_next_psn 0x%x"
"acked_tail %u state %u r_ack_psn 0x%x r_flow_psn 0x%x " \
"r_last_ackd 0x%x s_next_psn 0x%x"
#define RCV_ERR_PRN "[%s] qpn 0x%x s_flags 0x%x state 0x%x " \
"s_tail_ack_queue %u " \
"s_acked_ack_queue %u s_tail_ack_queue %u " \
"r_head_ack_queue %u opcode 0x%x psn 0x%x r_psn 0x%x " \
" diff %d"
#define TID_WRITE_RSPDR_PRN "[%s] qpn 0x%x r_tid_head %u r_tid_tail %u " \
"r_tid_ack %u r_tid_alloc %u alloc_w_segs %u " \
"pending_tid_w_segs %u sync_pt %s " \
"ps_nak_psn 0x%x ps_nak_state 0x%x " \
"prnr_nak_state 0x%x hw_flow_index %u generation "\
"0x%x fpsn 0x%x flow_flags 0x%x resync %s" \
"r_next_psn_kdeth 0x%x"
#define TID_WRITE_SENDER_PRN "[%s] qpn 0x%x newreq %u s_tid_cur %u " \
"s_tid_tail %u s_tid_head %u " \
"pending_tid_w_resp %u n_requests %u " \
"n_tid_requests %u s_flags 0x%x ps_flags 0x%x "\
"iow_flags 0x%lx s_state 0x%x s_retry %u"
#define KDETH_EFLAGS_ERR_PRN "[%s] qpn 0x%x TID ERR: RcvType 0x%x " \
"RcvTypeError 0x%x PSN 0x%x"
DECLARE_EVENT_CLASS(/* class */
hfi1_exp_tid_reg_unreg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
@ -382,6 +399,18 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp, msg, more)
);
DEFINE_EVENT(/* event */
hfi1_msg_template, hfi1_msg_tid_timeout,
TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more),
TP_ARGS(qp, msg, more)
);
DEFINE_EVENT(/* event */
hfi1_msg_template, hfi1_msg_tid_retry_timeout,
TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more),
TP_ARGS(qp, msg, more)
);
DECLARE_EVENT_CLASS(/* tid_flow_page */
hfi1_tid_flow_page_template,
TP_PROTO(struct rvt_qp *qp, struct tid_rdma_flow *flow, u32 index,
@ -562,6 +591,42 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp, index, flow)
);
DEFINE_EVENT(/* event */
hfi1_tid_flow_template, hfi1_tid_flow_build_write_resp,
TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
TP_ARGS(qp, index, flow)
);
DEFINE_EVENT(/* event */
hfi1_tid_flow_template, hfi1_tid_flow_rcv_write_resp,
TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
TP_ARGS(qp, index, flow)
);
DEFINE_EVENT(/* event */
hfi1_tid_flow_template, hfi1_tid_flow_build_write_data,
TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
TP_ARGS(qp, index, flow)
);
DEFINE_EVENT(/* event */
hfi1_tid_flow_template, hfi1_tid_flow_rcv_tid_ack,
TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
TP_ARGS(qp, index, flow)
);
DEFINE_EVENT(/* event */
hfi1_tid_flow_template, hfi1_tid_flow_rcv_resync,
TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
TP_ARGS(qp, index, flow)
);
DEFINE_EVENT(/* event */
hfi1_tid_flow_template, hfi1_tid_flow_handle_kdeth_eflags,
TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
TP_ARGS(qp, index, flow)
);
DECLARE_EVENT_CLASS(/* tid_node */
hfi1_tid_node_template,
TP_PROTO(struct rvt_qp *qp, const char *msg, u32 index, u32 base,
@ -656,6 +721,18 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp, index, ent)
);
DEFINE_EVENT(/* event */
hfi1_tid_entry_template, hfi1_tid_entry_rcv_write_resp,
TP_PROTO(struct rvt_qp *qp, int index, u32 entry),
TP_ARGS(qp, index, entry)
);
DEFINE_EVENT(/* event */
hfi1_tid_entry_template, hfi1_tid_entry_build_write_data,
TP_PROTO(struct rvt_qp *qp, int index, u32 entry),
TP_ARGS(qp, index, entry)
);
DECLARE_EVENT_CLASS(/* rsp_info */
hfi1_responder_info_template,
TP_PROTO(struct rvt_qp *qp, u32 psn),
@ -671,6 +748,7 @@ DECLARE_EVENT_CLASS(/* rsp_info */
__field(u8, r_flags)
__field(u8, r_head_ack_queue)
__field(u8, s_tail_ack_queue)
__field(u8, s_acked_ack_queue)
__field(u8, s_ack_state)
__field(u8, s_nak_state)
__field(u8, r_nak_state)
@ -691,6 +769,7 @@ DECLARE_EVENT_CLASS(/* rsp_info */
__entry->r_flags = qp->r_flags;
__entry->r_head_ack_queue = qp->r_head_ack_queue;
__entry->s_tail_ack_queue = qp->s_tail_ack_queue;
__entry->s_acked_ack_queue = qp->s_acked_ack_queue;
__entry->s_ack_state = qp->s_ack_state;
__entry->s_nak_state = qp->s_nak_state;
__entry->s_flags = qp->s_flags;
@ -709,6 +788,7 @@ DECLARE_EVENT_CLASS(/* rsp_info */
__entry->r_flags,
__entry->r_head_ack_queue,
__entry->s_tail_ack_queue,
__entry->s_acked_ack_queue,
__entry->s_ack_state,
__entry->s_nak_state,
__entry->s_flags,
@ -735,6 +815,42 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp, psn)
);
DEFINE_EVENT(/* event */
hfi1_responder_info_template, hfi1_rsp_tid_write_alloc_res,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DEFINE_EVENT(/* event */
hfi1_responder_info_template, hfi1_rsp_rcv_tid_write_req,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DEFINE_EVENT(/* event */
hfi1_responder_info_template, hfi1_rsp_build_tid_write_resp,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DEFINE_EVENT(/* event */
hfi1_responder_info_template, hfi1_rsp_rcv_tid_write_data,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DEFINE_EVENT(/* event */
hfi1_responder_info_template, hfi1_rsp_make_tid_ack,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DEFINE_EVENT(/* event */
hfi1_responder_info_template, hfi1_rsp_handle_kdeth_eflags,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DECLARE_EVENT_CLASS(/* sender_info */
hfi1_sender_info_template,
TP_PROTO(struct rvt_qp *qp),
@ -827,6 +943,18 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp)
);
DEFINE_EVENT(/* event */
hfi1_sender_info_template, hfi1_sender_rcv_tid_ack,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp)
);
DEFINE_EVENT(/* event */
hfi1_sender_info_template, hfi1_sender_make_tid_pkt,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp)
);
DECLARE_EVENT_CLASS(/* tid_read_sender */
hfi1_tid_read_sender_template,
TP_PROTO(struct rvt_qp *qp, char newreq),
@ -841,6 +969,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */
__field(u32, s_flags)
__field(u32, ps_flags)
__field(unsigned long, iow_flags)
__field(u8, s_state)
__field(u32, hw_flow_index)
__field(u32, generation)
__field(u32, fpsn)
@ -858,6 +987,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */
__entry->s_flags = qp->s_flags;
__entry->ps_flags = priv->s_flags;
__entry->iow_flags = priv->s_iowait.flags;
__entry->s_state = priv->s_state;
__entry->hw_flow_index = priv->flow_state.index;
__entry->generation = priv->flow_state.generation;
__entry->fpsn = priv->flow_state.psn;
@ -874,6 +1004,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */
__entry->s_flags,
__entry->ps_flags,
__entry->iow_flags,
__entry->s_state,
__entry->hw_flow_index,
__entry->generation,
__entry->fpsn,
@ -902,12 +1033,16 @@ DECLARE_EVENT_CLASS(/* tid_rdma_request */
__field(u32, cur_seg)
__field(u32, comp_seg)
__field(u32, ack_seg)
__field(u32, alloc_seg)
__field(u32, total_segs)
__field(u16, setup_head)
__field(u16, clear_tail)
__field(u16, flow_idx)
__field(u16, acked_tail)
__field(u32, state)
__field(u32, r_ack_psn)
__field(u32, r_flow_psn)
__field(u32, r_last_acked)
__field(u32, s_next_psn)
),
TP_fast_assign(/* assign */
@ -920,12 +1055,16 @@ DECLARE_EVENT_CLASS(/* tid_rdma_request */
__entry->cur_seg = req->cur_seg;
__entry->comp_seg = req->comp_seg;
__entry->ack_seg = req->ack_seg;
__entry->alloc_seg = req->alloc_seg;
__entry->total_segs = req->total_segs;
__entry->setup_head = req->setup_head;
__entry->clear_tail = req->clear_tail;
__entry->flow_idx = req->flow_idx;
__entry->acked_tail = req->acked_tail;
__entry->state = req->state;
__entry->r_ack_psn = req->r_ack_psn;
__entry->r_flow_psn = req->r_flow_psn;
__entry->r_last_acked = req->r_last_acked;
__entry->s_next_psn = req->s_next_psn;
),
TP_printk(/* print */
@ -939,12 +1078,16 @@ DECLARE_EVENT_CLASS(/* tid_rdma_request */
__entry->cur_seg,
__entry->comp_seg,
__entry->ack_seg,
__entry->alloc_seg,
__entry->total_segs,
__entry->setup_head,
__entry->clear_tail,
__entry->flow_idx,
__entry->acked_tail,
__entry->state,
__entry->r_ack_psn,
__entry->r_flow_psn,
__entry->r_last_acked,
__entry->s_next_psn
)
);
@ -998,6 +1141,97 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_write_alloc_res,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_write_req,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_build_write_resp,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_write_resp,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_write_data,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_tid_ack,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_tid_retry_timeout,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_resync,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_make_tid_pkt,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_make_tid_ack,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_handle_kdeth_eflags,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_make_rc_ack_write,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_make_req_write,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
DECLARE_EVENT_CLASS(/* rc_rcv_err */
hfi1_rc_rcv_err_template,
TP_PROTO(struct rvt_qp *qp, u32 opcode, u32 psn, int diff),
@ -1007,6 +1241,7 @@ DECLARE_EVENT_CLASS(/* rc_rcv_err */
__field(u32, qpn)
__field(u32, s_flags)
__field(u8, state)
__field(u8, s_acked_ack_queue)
__field(u8, s_tail_ack_queue)
__field(u8, r_head_ack_queue)
__field(u32, opcode)
@ -1019,6 +1254,7 @@ DECLARE_EVENT_CLASS(/* rc_rcv_err */
__entry->qpn = qp->ibqp.qp_num;
__entry->s_flags = qp->s_flags;
__entry->state = qp->state;
__entry->s_acked_ack_queue = qp->s_acked_ack_queue;
__entry->s_tail_ack_queue = qp->s_tail_ack_queue;
__entry->r_head_ack_queue = qp->r_head_ack_queue;
__entry->opcode = opcode;
@ -1032,6 +1268,7 @@ DECLARE_EVENT_CLASS(/* rc_rcv_err */
__entry->qpn,
__entry->s_flags,
__entry->state,
__entry->s_acked_ack_queue,
__entry->s_tail_ack_queue,
__entry->r_head_ack_queue,
__entry->opcode,
@ -1081,6 +1318,289 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp, index, sge)
);
DECLARE_EVENT_CLASS(/* tid_write_sp */
hfi1_tid_write_rsp_template,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(u32, r_tid_head)
__field(u32, r_tid_tail)
__field(u32, r_tid_ack)
__field(u32, r_tid_alloc)
__field(u32, alloc_w_segs)
__field(u32, pending_tid_w_segs)
__field(bool, sync_pt)
__field(u32, ps_nak_psn)
__field(u8, ps_nak_state)
__field(u8, prnr_nak_state)
__field(u32, hw_flow_index)
__field(u32, generation)
__field(u32, fpsn)
__field(u32, flow_flags)
__field(bool, resync)
__field(u32, r_next_psn_kdeth)
),
TP_fast_assign(/* assign */
struct hfi1_qp_priv *priv = qp->priv;
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->r_tid_head = priv->r_tid_head;
__entry->r_tid_tail = priv->r_tid_tail;
__entry->r_tid_ack = priv->r_tid_ack;
__entry->r_tid_alloc = priv->r_tid_alloc;
__entry->alloc_w_segs = priv->alloc_w_segs;
__entry->pending_tid_w_segs = priv->pending_tid_w_segs;
__entry->sync_pt = priv->sync_pt;
__entry->ps_nak_psn = priv->s_nak_psn;
__entry->ps_nak_state = priv->s_nak_state;
__entry->prnr_nak_state = priv->rnr_nak_state;
__entry->hw_flow_index = priv->flow_state.index;
__entry->generation = priv->flow_state.generation;
__entry->fpsn = priv->flow_state.psn;
__entry->flow_flags = priv->flow_state.flags;
__entry->resync = priv->resync;
__entry->r_next_psn_kdeth = priv->r_next_psn_kdeth;
),
TP_printk(/* print */
TID_WRITE_RSPDR_PRN,
__get_str(dev),
__entry->qpn,
__entry->r_tid_head,
__entry->r_tid_tail,
__entry->r_tid_ack,
__entry->r_tid_alloc,
__entry->alloc_w_segs,
__entry->pending_tid_w_segs,
__entry->sync_pt ? "yes" : "no",
__entry->ps_nak_psn,
__entry->ps_nak_state,
__entry->prnr_nak_state,
__entry->hw_flow_index,
__entry->generation,
__entry->fpsn,
__entry->flow_flags,
__entry->resync ? "yes" : "no",
__entry->r_next_psn_kdeth
)
);
DEFINE_EVENT(/* event */
hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_alloc_res,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp)
);
DEFINE_EVENT(/* event */
hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_rcv_req,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp)
);
DEFINE_EVENT(/* event */
hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_build_resp,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp)
);
DEFINE_EVENT(/* event */
hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_rcv_data,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp)
);
DEFINE_EVENT(/* event */
hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_rcv_resync,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp)
);
DEFINE_EVENT(/* event */
hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_make_tid_ack,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp)
);
DEFINE_EVENT(/* event */
hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_handle_kdeth_eflags,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp)
);
DEFINE_EVENT(/* event */
hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_make_rc_ack,
TP_PROTO(struct rvt_qp *qp),
TP_ARGS(qp)
);
DECLARE_EVENT_CLASS(/* tid_write_sender */
hfi1_tid_write_sender_template,
TP_PROTO(struct rvt_qp *qp, char newreq),
TP_ARGS(qp, newreq),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(char, newreq)
__field(u32, s_tid_cur)
__field(u32, s_tid_tail)
__field(u32, s_tid_head)
__field(u32, pending_tid_w_resp)
__field(u32, n_requests)
__field(u32, n_tid_requests)
__field(u32, s_flags)
__field(u32, ps_flags)
__field(unsigned long, iow_flags)
__field(u8, s_state)
__field(u8, s_retry)
),
TP_fast_assign(/* assign */
struct hfi1_qp_priv *priv = qp->priv;
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->newreq = newreq;
__entry->s_tid_cur = priv->s_tid_cur;
__entry->s_tid_tail = priv->s_tid_tail;
__entry->s_tid_head = priv->s_tid_head;
__entry->pending_tid_w_resp = priv->pending_tid_w_resp;
__entry->n_requests = atomic_read(&priv->n_requests);
__entry->n_tid_requests = atomic_read(&priv->n_tid_requests);
__entry->s_flags = qp->s_flags;
__entry->ps_flags = priv->s_flags;
__entry->iow_flags = priv->s_iowait.flags;
__entry->s_state = priv->s_state;
__entry->s_retry = priv->s_retry;
),
TP_printk(/* print */
TID_WRITE_SENDER_PRN,
__get_str(dev),
__entry->qpn,
__entry->newreq,
__entry->s_tid_cur,
__entry->s_tid_tail,
__entry->s_tid_head,
__entry->pending_tid_w_resp,
__entry->n_requests,
__entry->n_tid_requests,
__entry->s_flags,
__entry->ps_flags,
__entry->iow_flags,
__entry->s_state,
__entry->s_retry
)
);
DEFINE_EVENT(/* event */
hfi1_tid_write_sender_template, hfi1_tid_write_sender_rcv_resp,
TP_PROTO(struct rvt_qp *qp, char newreq),
TP_ARGS(qp, newreq)
);
DEFINE_EVENT(/* event */
hfi1_tid_write_sender_template, hfi1_tid_write_sender_rcv_tid_ack,
TP_PROTO(struct rvt_qp *qp, char newreq),
TP_ARGS(qp, newreq)
);
DEFINE_EVENT(/* event */
hfi1_tid_write_sender_template, hfi1_tid_write_sender_retry_timeout,
TP_PROTO(struct rvt_qp *qp, char newreq),
TP_ARGS(qp, newreq)
);
DEFINE_EVENT(/* event */
hfi1_tid_write_sender_template, hfi1_tid_write_sender_make_tid_pkt,
TP_PROTO(struct rvt_qp *qp, char newreq),
TP_ARGS(qp, newreq)
);
DEFINE_EVENT(/* event */
hfi1_tid_write_sender_template, hfi1_tid_write_sender_make_req,
TP_PROTO(struct rvt_qp *qp, char newreq),
TP_ARGS(qp, newreq)
);
DEFINE_EVENT(/* event */
hfi1_tid_write_sender_template, hfi1_tid_write_sender_restart_rc,
TP_PROTO(struct rvt_qp *qp, char newreq),
TP_ARGS(qp, newreq)
);
DECLARE_EVENT_CLASS(/* tid_ack */
hfi1_tid_ack_template,
TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
u32 req_psn, u32 resync_psn),
TP_ARGS(qp, aeth, psn, req_psn, resync_psn),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(u32, aeth)
__field(u32, psn)
__field(u32, req_psn)
__field(u32, resync_psn)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
__entry->qpn = qp->ibqp.qp_num;
__entry->aeth = aeth;
__entry->psn = psn;
__entry->req_psn = req_psn;
__entry->resync_psn = resync_psn;
),
TP_printk(/* print */
"[%s] qpn 0x%x aeth 0x%x psn 0x%x req_psn 0x%x resync_psn 0x%x",
__get_str(dev),
__entry->qpn,
__entry->aeth,
__entry->psn,
__entry->req_psn,
__entry->resync_psn
)
);
DEFINE_EVENT(/* rcv_tid_ack */
hfi1_tid_ack_template, hfi1_rcv_tid_ack,
TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
u32 req_psn, u32 resync_psn),
TP_ARGS(qp, aeth, psn, req_psn, resync_psn)
);
DECLARE_EVENT_CLASS(/* kdeth_eflags_error */
hfi1_kdeth_eflags_error_template,
TP_PROTO(struct rvt_qp *qp, u8 rcv_type, u8 rte, u32 psn),
TP_ARGS(qp, rcv_type, rte, psn),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(u8, rcv_type)
__field(u8, rte)
__field(u32, psn)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->rcv_type = rcv_type;
__entry->rte = rte;
__entry->psn = psn;
),
TP_printk(/* print */
KDETH_EFLAGS_ERR_PRN,
__get_str(dev),
__entry->qpn,
__entry->rcv_type,
__entry->rte,
__entry->psn
)
);
DEFINE_EVENT(/* event */
hfi1_kdeth_eflags_error_template, hfi1_eflags_err_write,
TP_PROTO(struct rvt_qp *qp, u8 rcv_type, u8 rte, u32 psn),
TP_ARGS(qp, rcv_type, rte, psn)
);
#endif /* __HFI1_TRACE_TID_H */
#undef TRACE_INCLUDE_PATH

View File

@ -846,6 +846,12 @@ DEFINE_EVENT(
TP_ARGS(qp, flag)
);
DEFINE_EVENT(/* event */
hfi1_do_send_template, hfi1_rc_do_tid_send,
TP_PROTO(struct rvt_qp *qp, bool flag),
TP_ARGS(qp, flag)
);
DEFINE_EVENT(
hfi1_do_send_template, hfi1_rc_expired_time_slice,
TP_PROTO(struct rvt_qp *qp, bool flag),

View File

@ -144,8 +144,10 @@ static int defer_packet_queue(
*/
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
write_seqlock(&sde->waitlock);
if (list_empty(&pq->busy.list))
if (list_empty(&pq->busy.list)) {
iowait_get_priority(&pq->busy);
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
}
write_sequnlock(&sde->waitlock);
return -EBUSY;
eagain:
@ -191,7 +193,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
pq->mm = fd->mm;
iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
activate_packet_queue, NULL);
activate_packet_queue, NULL, NULL);
pq->reqidx = 0;
pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
@ -1126,7 +1128,8 @@ static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags)
0xffffffull),
psn = val & mask;
if (expct)
psn = (psn & ~BTH_SEQ_MASK) | ((psn + frags) & BTH_SEQ_MASK);
psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) |
((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK);
else
psn = psn + frags;
return psn & mask;

View File

@ -161,6 +161,7 @@ MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the
*/
const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
[IB_WR_TID_RDMA_WRITE] = IB_WC_RDMA_WRITE,
[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
[IB_WR_SEND] = IB_WC_SEND,
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
@ -203,6 +204,12 @@ const u8 hdr_len_by_opcode[256] = {
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4,
[IB_OPCODE_TID_RDMA_READ_REQ] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_READ_RESP] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_REQ] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_RESP] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_DATA] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_ACK] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_RESYNC] = 12 + 8 + 36,
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = 12 + 8,
[IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8,
@ -248,8 +255,14 @@ static const opcode_handler opcode_handler_tbl[256] = {
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv,
/* TID RDMA has separate handlers for different opcodes.*/
[IB_OPCODE_TID_RDMA_WRITE_REQ] = &hfi1_rc_rcv_tid_rdma_write_req,
[IB_OPCODE_TID_RDMA_WRITE_RESP] = &hfi1_rc_rcv_tid_rdma_write_resp,
[IB_OPCODE_TID_RDMA_WRITE_DATA] = &hfi1_rc_rcv_tid_rdma_write_data,
[IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = &hfi1_rc_rcv_tid_rdma_write_data,
[IB_OPCODE_TID_RDMA_READ_REQ] = &hfi1_rc_rcv_tid_rdma_read_req,
[IB_OPCODE_TID_RDMA_READ_RESP] = &hfi1_rc_rcv_tid_rdma_read_resp,
[IB_OPCODE_TID_RDMA_RESYNC] = &hfi1_rc_rcv_tid_rdma_resync,
[IB_OPCODE_TID_RDMA_ACK] = &hfi1_rc_rcv_tid_rdma_ack,
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv,
@ -932,6 +945,7 @@ static int pio_wait(struct rvt_qp *qp,
dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN);
qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
iowait_get_priority(&priv->s_iowait);
iowait_queue(ps->pkts_sent, &priv->s_iowait,
&sc->piowait);
priv->s_iowait.lock = &sc->waitlock;
@ -1332,7 +1346,9 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
rdi->dparms.props.max_mr_size = U64_MAX;
rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
rdi->dparms.props.max_qp = hfi1_max_qps;
rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
rdi->dparms.props.max_qp_wr =
(hfi1_max_qp_wrs >= HFI1_QP_WQE_INVALID ?
HFI1_QP_WQE_INVALID - 1 : hfi1_max_qp_wrs);
rdi->dparms.props.max_send_sge = hfi1_max_sges;
rdi->dparms.props.max_recv_sge = hfi1_max_sges;
rdi->dparms.props.max_sge_rd = hfi1_max_sges;
@ -1888,7 +1904,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
dd->verbs_dev.rdi.dparms.reserved_operations = 1;
dd->verbs_dev.rdi.dparms.extra_rdma_atomic = 1;
dd->verbs_dev.rdi.dparms.extra_rdma_atomic = HFI1_TID_RDMA_WRITE_CNT;
/* post send table */
dd->verbs_dev.rdi.post_parms = hfi1_post_parms;

View File

@ -163,16 +163,39 @@ struct hfi1_qp_priv {
u32 tid_enqueue; /* saved when tid waited */
u8 s_sc; /* SC[0..4] for next packet */
struct iowait s_iowait;
struct timer_list s_tid_timer; /* for timing tid wait */
struct timer_list s_tid_retry_timer; /* for timing tid ack */
struct list_head tid_wait; /* for queueing tid space */
struct hfi1_opfn_data opfn;
struct tid_flow_state flow_state;
struct tid_rdma_qp_params tid_rdma;
struct rvt_qp *owner;
u8 hdr_type; /* 9B or 16B */
struct rvt_sge_state tid_ss; /* SGE state pointer for 2nd leg */
atomic_t n_requests; /* # of TID RDMA requests in the */
/* queue */
atomic_t n_tid_requests; /* # of sent TID RDMA requests */
unsigned long tid_timer_timeout_jiffies;
unsigned long tid_retry_timeout_jiffies;
/* variables for the TID RDMA SE state machine */
u8 s_state;
u8 s_retry;
u8 rnr_nak_state; /* RNR NAK state */
u8 s_nak_state;
u32 s_nak_psn;
u32 s_flags;
u32 s_tid_cur;
u32 s_tid_head;
u32 s_tid_tail;
u32 r_tid_head; /* Most recently added TID RDMA request */
u32 r_tid_tail; /* the last completed TID RDMA request */
u32 r_tid_ack; /* the TID RDMA request to be ACK'ed */
u32 r_tid_alloc; /* Request for which we are allocating resources */
u32 pending_tid_w_segs; /* Num of pending tid write segments */
u32 pending_tid_w_resp; /* Num of pending tid write responses */
u32 alloc_w_segs; /* Number of segments for which write */
/* resources have been allocated for this QP */
/* For TID RDMA READ */
u32 tid_r_reqs; /* Num of tid reads requested */
@ -180,14 +203,23 @@ struct hfi1_qp_priv {
u32 pending_tid_r_segs; /* Num of pending tid read segments */
u16 pkts_ps; /* packets per segment */
u8 timeout_shift; /* account for number of packets per segment */
u32 r_next_psn_kdeth;
u32 r_next_psn_kdeth_save;
u32 s_resync_psn;
u8 sync_pt; /* Set when QP reaches sync point */
u8 resync;
};
#define HFI1_QP_WQE_INVALID ((u32)-1)
struct hfi1_swqe_priv {
struct tid_rdma_request tid_req;
struct rvt_sge_state ss; /* Used for TID RDMA READ Request */
};
struct hfi1_ack_priv {
struct rvt_sge_state ss; /* used for TID WRITE RESP */
struct tid_rdma_request tid_req;
};
@ -412,6 +444,9 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth1, u32 bth2, int middle,
struct hfi1_pkt_state *ps);
bool hfi1_schedule_send_yield(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
bool tid);
void _hfi1_do_send(struct work_struct *work);
void hfi1_do_send_from_rvt(struct rvt_qp *qp);

View File

@ -94,6 +94,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
tx->txreq.num_desc = 0;
/* Set the header type */
tx->phdr.hdr.hdr_type = priv->hdr_type;
tx->txreq.flags = 0;
return tx;
}

View File

@ -240,8 +240,10 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
}
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
if (list_empty(&vnic_sdma->wait.list))
if (list_empty(&vnic_sdma->wait.list)) {
iowait_get_priority(wait->iow);
iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
}
write_sequnlock(&sde->waitlock);
return -EBUSY;
}
@ -281,7 +283,7 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
hfi1_vnic_sdma_sleep,
hfi1_vnic_sdma_wakeup, NULL);
hfi1_vnic_sdma_wakeup, NULL, NULL);
vnic_sdma->sde = &vinfo->dd->per_sdma[i];
vnic_sdma->dd = vinfo->dd;
vnic_sdma->vinfo = vinfo;

View File

@ -854,6 +854,7 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
qp->s_mig_state = IB_MIG_MIGRATED;
qp->r_head_ack_queue = 0;
qp->s_tail_ack_queue = 0;
qp->s_acked_ack_queue = 0;
qp->s_num_rd_atomic = 0;
if (qp->r_rq.wq) {
qp->r_rq.wq->head = 0;

View File

@ -123,6 +123,11 @@ union ib_ehdrs {
union {
struct tid_rdma_read_req r_req;
struct tid_rdma_read_resp r_rsp;
struct tid_rdma_write_req w_req;
struct tid_rdma_write_resp w_rsp;
struct tid_rdma_write_data w_data;
struct tid_rdma_resync resync;
struct tid_rdma_ack ack;
} tid_rdma;
} __packed;

View File

@ -246,6 +246,7 @@ struct rvt_ack_entry {
#define RVT_OPERATION_ATOMIC_SGE 0x00000004
#define RVT_OPERATION_LOCAL 0x00000008
#define RVT_OPERATION_USE_RESERVE 0x00000010
#define RVT_OPERATION_IGN_RNR_CNT 0x00000020
#define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1)
@ -375,6 +376,7 @@ struct rvt_qp {
u8 s_rnr_retry; /* requester RNR retry counter */
u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
u8 s_tail_ack_queue; /* index into s_ack_queue[] */
u8 s_acked_ack_queue; /* index into s_ack_queue[] */
struct rvt_sge_state s_ack_rdma_sge;
struct timer_list s_timer;

View File

@ -27,16 +27,71 @@ struct tid_rdma_read_resp {
__be32 verbs_qp;
};
struct tid_rdma_write_req {
__le32 kdeth0;
__le32 kdeth1;
struct ib_reth reth;
__be32 reserved[2];
__be32 verbs_qp;
};
struct tid_rdma_write_resp {
__le32 kdeth0;
__le32 kdeth1;
__be32 aeth;
__be32 reserved[3];
__be32 tid_flow_psn;
__be32 tid_flow_qp;
__be32 verbs_qp;
};
struct tid_rdma_write_data {
__le32 kdeth0;
__le32 kdeth1;
__be32 reserved[6];
__be32 verbs_qp;
};
struct tid_rdma_resync {
__le32 kdeth0;
__le32 kdeth1;
__be32 reserved[6];
__be32 verbs_qp;
};
struct tid_rdma_ack {
__le32 kdeth0;
__le32 kdeth1;
__be32 aeth;
__be32 reserved[2];
__be32 tid_flow_psn;
__be32 verbs_psn;
__be32 tid_flow_qp;
__be32 verbs_qp;
};
/*
* TID RDMA Opcodes
*/
#define IB_OPCODE_TID_RDMA 0xe0
enum {
IB_OPCODE_WRITE_REQ = 0x0,
IB_OPCODE_WRITE_RESP = 0x1,
IB_OPCODE_WRITE_DATA = 0x2,
IB_OPCODE_WRITE_DATA_LAST = 0x3,
IB_OPCODE_READ_REQ = 0x4,
IB_OPCODE_READ_RESP = 0x5,
IB_OPCODE_RESYNC = 0x6,
IB_OPCODE_ACK = 0x7,
IB_OPCODE(TID_RDMA, WRITE_REQ),
IB_OPCODE(TID_RDMA, WRITE_RESP),
IB_OPCODE(TID_RDMA, WRITE_DATA),
IB_OPCODE(TID_RDMA, WRITE_DATA_LAST),
IB_OPCODE(TID_RDMA, READ_REQ),
IB_OPCODE(TID_RDMA, READ_RESP),
IB_OPCODE(TID_RDMA, RESYNC),
IB_OPCODE(TID_RDMA, ACK),
};
#define TID_OP(x) IB_OPCODE_TID_RDMA_##x
@ -47,6 +102,7 @@ enum {
* low level drivers. Two of those are used but renamed
* to be more descriptive.
*/
#define IB_WR_TID_RDMA_WRITE IB_WR_RESERVED1
#define IB_WR_TID_RDMA_READ IB_WR_RESERVED2
#endif /* TID_RDMA_DEFS_H */