IB/rdmavt: Add create queue pair functionality

Add create queue pair verbs call as well as supporting functions.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Harish Chegondi <harish.chegondi@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
Dennis Dalessandro 2016-01-22 12:50:17 -08:00 committed by Doug Ledford
parent 050eb7fbe0
commit 515667f8f8
3 changed files with 413 additions and 23 deletions

View file

@ -47,8 +47,11 @@
#include <linux/bitops.h>
#include <linux/lockdep.h>
#include "vt.h"
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
#include "qp.h"
#include "vt.h"
static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map)
{
@ -151,7 +154,10 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi)
* If driver is not doing any QP allocation then make sure it is
* providing the necessary QP functions.
*/
if (!rdi->driver_f.free_all_qps)
if (!rdi->driver_f.free_all_qps ||
!rdi->driver_f.qp_priv_alloc ||
!rdi->driver_f.qp_priv_free ||
!rdi->driver_f.notify_qp_reset)
return -EINVAL;
/* allocate parent object */
@ -178,7 +184,9 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi)
if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table))
goto fail_table;
return ret;
spin_lock_init(&rdi->n_qps_lock);
return 0;
fail_table:
kfree(rdi->qp_dev->qp_table);
@ -197,31 +205,29 @@ no_qp_table:
* There should not be any QPs still in use.
* Free memory for table.
*/
static unsigned free_all_qps(struct rvt_dev_info *rdi)
static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi)
{
unsigned long flags;
struct rvt_qp *qp;
unsigned n, qp_inuse = 0;
spinlock_t *ql; /* work around too long line below */
rdi->driver_f.free_all_qps(rdi);
if (rdi->driver_f.free_all_qps)
qp_inuse = rdi->driver_f.free_all_qps(rdi);
if (!rdi->qp_dev)
return 0;
return qp_inuse;
ql = &rdi->qp_dev->qpt_lock;
spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
spin_lock_irqsave(ql, flags);
for (n = 0; n < rdi->qp_dev->qp_table_size; n++) {
qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n],
lockdep_is_held(ql));
RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL);
qp = rcu_dereference_protected(qp->next,
lockdep_is_held(ql));
while (qp) {
for (; qp; qp = rcu_dereference_protected(qp->next,
lockdep_is_held(ql)))
qp_inuse++;
qp = rcu_dereference_protected(qp->next,
lockdep_is_held(ql));
}
}
spin_unlock_irqrestore(ql, flags);
synchronize_rcu();
@ -230,26 +236,190 @@ static unsigned free_all_qps(struct rvt_dev_info *rdi)
void rvt_qp_exit(struct rvt_dev_info *rdi)
{
u32 qps_inuse = free_all_qps(rdi);
u32 qps_inuse = rvt_free_all_qps(rdi);
qps_inuse = free_all_qps(rdi);
if (qps_inuse)
rvt_pr_err(rdi, "QP memory leak! %u still in use\n",
qps_inuse);
if (!rdi->qp_dev)
return;
if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER)
return; /* driver did the qp init so nothing else to do */
kfree(rdi->qp_dev->qp_table);
free_qpn_table(&rdi->qp_dev->qpn_table);
kfree(rdi->qp_dev);
}
static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map, unsigned off)
{
return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
}
/*
* Allocate the next available QPN or
* zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
*/
static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
enum ib_qp_type type, u8 port)
{
u32 i, offset, max_scan, qpn;
struct rvt_qpn_map *map;
u32 ret;
if (rdi->driver_f.alloc_qpn)
return rdi->driver_f.alloc_qpn(rdi, qpt, type, port);
if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
unsigned n;
ret = type == IB_QPT_GSI;
n = 1 << (ret + 2 * (port - 1));
spin_lock(&qpt->lock);
if (qpt->flags & n)
ret = -EINVAL;
else
qpt->flags |= n;
spin_unlock(&qpt->lock);
goto bail;
}
qpn = qpt->last + qpt->incr;
if (qpn >= RVT_QPN_MAX)
qpn = qpt->incr | ((qpt->last & 1) ^ 1);
/* offset carries bit 0 */
offset = qpn & RVT_BITS_PER_PAGE_MASK;
map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
max_scan = qpt->nmaps - !offset;
for (i = 0;;) {
if (unlikely(!map->page)) {
get_map_page(qpt, map);
if (unlikely(!map->page))
break;
}
do {
if (!test_and_set_bit(offset, map->page)) {
qpt->last = qpn;
ret = qpn;
goto bail;
}
offset += qpt->incr;
/*
* This qpn might be bogus if offset >= BITS_PER_PAGE.
* That is OK. It gets re-assigned below
*/
qpn = mk_qpn(qpt, map, offset);
} while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
/*
* In order to keep the number of pages allocated to a
* minimum, we scan the all existing pages before increasing
* the size of the bitmap table.
*/
if (++i > max_scan) {
if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
break;
map = &qpt->map[qpt->nmaps++];
/* start at incr with current bit 0 */
offset = qpt->incr | (offset & 1);
} else if (map < &qpt->map[qpt->nmaps]) {
++map;
/* start at incr with current bit 0 */
offset = qpt->incr | (offset & 1);
} else {
map = &qpt->map[0];
/* wrap to first map page, invert bit 0 */
offset = qpt->incr | ((offset & 1) ^ 1);
}
/* there can be no bits at shift and below */
WARN_ON(offset & (rdi->dparms.qos_shift - 1));
qpn = mk_qpn(qpt, map, offset);
}
ret = -ENOMEM;
bail:
return ret;
}
static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
{
struct rvt_qpn_map *map;
map = qpt->map + qpn / RVT_BITS_PER_PAGE;
if (map->page)
clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
}
/**
* reset_qp - initialize the QP state to the reset state
* @qp: the QP to reset
* @type: the QP type
*/
static void reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
enum ib_qp_type type)
{
qp->remote_qpn = 0;
qp->qkey = 0;
qp->qp_access_flags = 0;
/*
* Let driver do anything it needs to for a new/reset qp
*/
rdi->driver_f.notify_qp_reset(qp);
qp->s_flags &= RVT_S_SIGNAL_REQ_WR;
qp->s_hdrwords = 0;
qp->s_wqe = NULL;
qp->s_draining = 0;
qp->s_next_psn = 0;
qp->s_last_psn = 0;
qp->s_sending_psn = 0;
qp->s_sending_hpsn = 0;
qp->s_psn = 0;
qp->r_psn = 0;
qp->r_msn = 0;
if (type == IB_QPT_RC) {
qp->s_state = IB_OPCODE_RC_SEND_LAST;
qp->r_state = IB_OPCODE_RC_SEND_LAST;
} else {
qp->s_state = IB_OPCODE_UC_SEND_LAST;
qp->r_state = IB_OPCODE_UC_SEND_LAST;
}
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_nak_state = 0;
qp->r_aflags = 0;
qp->r_flags = 0;
qp->s_head = 0;
qp->s_tail = 0;
qp->s_cur = 0;
qp->s_acked = 0;
qp->s_last = 0;
qp->s_ssn = 1;
qp->s_lsn = 0;
qp->s_mig_state = IB_MIG_MIGRATED;
memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
qp->r_head_ack_queue = 0;
qp->s_tail_ack_queue = 0;
qp->s_num_rd_atomic = 0;
if (qp->r_rq.wq) {
qp->r_rq.wq->head = 0;
qp->r_rq.wq->tail = 0;
}
qp->r_sge.num_sge = 0;
}
/**
* rvt_create_qp - create a queue pair for a device
* @ibpd: the protection domain who's device we create the queue pair for
* @init_attr: the attributes of the queue pair
* @udata: user data for libibverbs.so
*
* Queue pair creation is mostly an rvt issue. However, drivers have their own
* unique idea of what queue pair numbers mean. For instance there is a reserved
* range for PSM.
*
* Returns the queue pair on success, otherwise returns an errno.
*
* Called by the ib_create_qp() core verbs function.
@ -258,15 +428,226 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
struct rvt_qp *qp;
int err;
struct rvt_swqe *swq = NULL;
size_t sz;
size_t sg_list_sz;
struct ib_qp *ret = ERR_PTR(-ENOMEM);
struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
void *priv = NULL;
if (!rdi)
return ERR_PTR(-EINVAL);
if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
init_attr->create_flags)
return ERR_PTR(-EINVAL);
/* Check receive queue parameters if no SRQ is specified. */
if (!init_attr->srq) {
if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
return ERR_PTR(-EINVAL);
if (init_attr->cap.max_send_sge +
init_attr->cap.max_send_wr +
init_attr->cap.max_recv_sge +
init_attr->cap.max_recv_wr == 0)
return ERR_PTR(-EINVAL);
}
switch (init_attr->qp_type) {
case IB_QPT_SMI:
case IB_QPT_GSI:
if (init_attr->port_num == 0 ||
init_attr->port_num > ibpd->device->phys_port_cnt)
return ERR_PTR(-EINVAL);
case IB_QPT_UC:
case IB_QPT_RC:
case IB_QPT_UD:
sz = sizeof(struct rvt_sge) *
init_attr->cap.max_send_sge +
sizeof(struct rvt_swqe);
swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
if (!swq)
return ERR_PTR(-ENOMEM);
sz = sizeof(*qp);
sg_list_sz = 0;
if (init_attr->srq) {
struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq);
if (srq->rq.max_sge > 1)
sg_list_sz = sizeof(*qp->r_sg_list) *
(srq->rq.max_sge - 1);
} else if (init_attr->cap.max_recv_sge > 1)
sg_list_sz = sizeof(*qp->r_sg_list) *
(init_attr->cap.max_recv_sge - 1);
qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
if (!qp)
goto bail_swq;
RCU_INIT_POINTER(qp->next, NULL);
/*
* Driver needs to set up it's private QP structure and do any
* initialization that is needed.
*/
priv = rdi->driver_f.qp_priv_alloc(rdi, qp);
if (!priv)
goto bail_qp;
qp->priv = priv;
qp->timeout_jiffies =
usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
1000UL);
if (init_attr->srq) {
sz = 0;
} else {
qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
sizeof(struct rvt_rwqe);
qp->r_rq.wq = vmalloc_user(sizeof(struct rvt_rwq) +
qp->r_rq.size * sz);
if (!qp->r_rq.wq)
goto bail_driver_priv;
}
/*
* ib_create_qp() will initialize qp->ibqp
* except for qp->ibqp.qp_num.
*/
spin_lock_init(&qp->r_lock);
spin_lock_init(&qp->s_lock);
spin_lock_init(&qp->r_rq.lock);
atomic_set(&qp->refcount, 0);
init_waitqueue_head(&qp->wait);
init_timer(&qp->s_timer);
qp->s_timer.data = (unsigned long)qp;
INIT_LIST_HEAD(&qp->rspwait);
qp->state = IB_QPS_RESET;
qp->s_wq = swq;
qp->s_size = init_attr->cap.max_send_wr + 1;
qp->s_max_sge = init_attr->cap.max_send_sge;
if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
qp->s_flags = RVT_S_SIGNAL_REQ_WR;
err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
init_attr->qp_type,
init_attr->port_num);
if (err < 0) {
ret = ERR_PTR(err);
goto bail_rq_wq;
}
qp->ibqp.qp_num = err;
qp->port_num = init_attr->port_num;
reset_qp(rdi, qp, init_attr->qp_type);
break;
default:
/* Don't support raw QPs */
return ERR_PTR(-EINVAL);
}
init_attr->cap.max_inline_data = 0;
/*
* Queue pair creation is mostly an rvt issue. However, drivers have
* their own unique idea of what queue pare numbers mean. For instance
* there is a reserved range for PSM.
*
* VI-DRIVER-API: make_qpn()
* Returns a valid QPN for verbs to use
* Return the address of the RWQ as the offset to mmap.
* See hfi1_mmap() for details.
*/
return ERR_PTR(-EOPNOTSUPP);
if (udata && udata->outlen >= sizeof(__u64)) {
if (!qp->r_rq.wq) {
__u64 offset = 0;
err = ib_copy_to_udata(udata, &offset,
sizeof(offset));
if (err) {
ret = ERR_PTR(err);
goto bail_qpn;
}
} else {
u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;
qp->ip = rvt_create_mmap_info(rdi, s,
ibpd->uobject->context,
qp->r_rq.wq);
if (!qp->ip) {
ret = ERR_PTR(-ENOMEM);
goto bail_qpn;
}
err = ib_copy_to_udata(udata, &qp->ip->offset,
sizeof(qp->ip->offset));
if (err) {
ret = ERR_PTR(err);
goto bail_ip;
}
}
}
spin_lock(&rdi->n_qps_lock);
if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) {
spin_unlock(&rdi->n_qps_lock);
ret = ERR_PTR(-ENOMEM);
goto bail_ip;
}
rdi->n_qps_allocated++;
spin_unlock(&rdi->n_qps_lock);
if (qp->ip) {
spin_lock_irq(&rdi->pending_lock);
list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps);
spin_unlock_irq(&rdi->pending_lock);
}
ret = &qp->ibqp;
/*
* We have our QP and its good, now keep track of what types of opcodes
* can be processed on this QP. We do this by keeping track of what the
* 3 high order bits of the opcode are.
*/
switch (init_attr->qp_type) {
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_UD:
qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK;
break;
case IB_QPT_RC:
qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK;
break;
case IB_QPT_UC:
qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK;
break;
default:
ret = ERR_PTR(-EINVAL);
goto bail_ip;
}
return ret;
bail_ip:
kref_put(&qp->ip->ref, rvt_release_mmap_info);
bail_qpn:
free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
bail_rq_wq:
vfree(qp->r_rq.wq);
bail_driver_priv:
rdi->driver_f.qp_priv_free(rdi, qp);
bail_qp:
kfree(qp);
bail_swq:
vfree(swq);
return ret;
}
/**

View file

@ -362,6 +362,7 @@ void rvt_unregister_device(struct rvt_dev_info *rdi)
ib_unregister_device(&rdi->ibdev);
rvt_mr_exit(rdi);
rvt_qp_exit(rdi);
}
EXPORT_SYMBOL(rvt_unregister_device);

View file

@ -222,7 +222,10 @@ struct rvt_driver_provided {
int (*port_callback)(struct ib_device *, u8, struct kobject *);
const char * (*get_card_name)(struct rvt_dev_info *rdi);
struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi);
void (*free_all_qps)(struct rvt_dev_info *rdi);
unsigned (*free_all_qps)(struct rvt_dev_info *rdi);
void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
void (*notify_qp_reset)(struct rvt_qp *qp);
/*--------------------*/
/* Optional functions */
@ -230,6 +233,8 @@ struct rvt_driver_provided {
int (*check_ah)(struct ib_device *, struct ib_ah_attr *);
void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *,
struct rvt_ah *);
int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
enum ib_qp_type type, u8 port);
};
struct rvt_dev_info {
@ -262,7 +267,10 @@ struct rvt_dev_info {
int flags;
struct rvt_ibport **ports;
/* QP */
struct rvt_qp_ibdev *qp_dev;
u32 n_qps_allocated; /* number of QPs allocated for device */
spinlock_t n_qps_lock; /* keep track of number of qps */
/* memory maps */
struct list_head pending_mmaps;