RDMA/hns: Support multi hop addressing for PBL in hip08

The block base address in the MR can be retrieved by the block number
which is calculated with the VA in the SGE and MTPT. In hip08, the PBL
supports multi hop addressing to retrieve the block base address by
the block number.

This patch is to add the interfaces in the MR to support multi hop
addressing for the PBL.

Signed-off-by: Shaobo Xu <xushaobo2@huawei.com>
Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
Wei Hu(Xavier) 2017-08-30 17:23:10 +08:00 committed by Doug Ledford
parent 9766edc34e
commit ff795f7178
4 changed files with 418 additions and 24 deletions

View file

@ -267,6 +267,19 @@ struct hns_roce_mr {
int type; /* MR's register type */
u64 *pbl_buf;/* MR's PBL space */
dma_addr_t pbl_dma_addr; /* MR's PBL space PA */
u32 pbl_size;/* PA number in the PBL */
u64 pbl_ba;/* page table address */
u32 l0_chunk_last_num;/* L0 last number */
u32 l1_chunk_last_num;/* L1 last number */
u64 **pbl_bt_l2;/* PBL BT L2 */
u64 **pbl_bt_l1;/* PBL BT L1 */
u64 *pbl_bt_l0;/* PBL BT L0 */
dma_addr_t *pbl_l2_dma_addr;/* PBL BT L2 dma addr */
dma_addr_t *pbl_l1_dma_addr;/* PBL BT L1 dma addr */
dma_addr_t pbl_l0_dma_addr;/* PBL BT L0 dma addr */
u32 pbl_ba_pg_sz;/* BT chunk page size */
u32 pbl_buf_pg_sz;/* buf chunk page size */
u32 pbl_hop_num;/* multi-hop number */
};
struct hns_roce_mr_table {
@ -514,6 +527,9 @@ struct hns_roce_caps {
int qpc_entry_sz;
int irrl_entry_sz;
int cqc_entry_sz;
u32 pbl_ba_pg_sz;
u32 pbl_buf_pg_sz;
u32 pbl_hop_num;
int aeqe_depth;
int ceqe_depth[HNS_ROCE_COMP_VEC_NUM];
enum ib_mtu max_mtu;

View file

@ -613,6 +613,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->mpt_ba_pg_sz = 0;
caps->mpt_buf_pg_sz = 0;
caps->mpt_hop_num = HNS_ROCE_CONTEXT_HOP_NUM;
caps->pbl_ba_pg_sz = 0;
caps->pbl_buf_pg_sz = 0;
caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM;
caps->mtt_ba_pg_sz = 0;
caps->mtt_buf_pg_sz = 0;
caps->mtt_hop_num = HNS_ROCE_MTT_HOP_NUM;

View file

@ -75,6 +75,7 @@
#define HNS_ROCE_CONTEXT_HOP_NUM 1
#define HNS_ROCE_MTT_HOP_NUM 1
#define HNS_ROCE_CQE_HOP_NUM 1
#define HNS_ROCE_PBL_HOP_NUM 2
#define HNS_ROCE_CMD_FLAG_IN_VALID_SHIFT 0
#define HNS_ROCE_CMD_FLAG_OUT_VALID_SHIFT 1

View file

@ -258,6 +258,239 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
}
EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup);
static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
struct hns_roce_mr *mr, int err_loop_index,
int loop_i, int loop_j)
{
struct device *dev = hr_dev->dev;
u32 mhop_num;
u32 pbl_bt_sz;
u64 bt_idx;
int i, j;
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
mhop_num = hr_dev->caps.pbl_hop_num;
i = loop_i;
j = loop_j;
if (mhop_num == 3 && err_loop_index == 2) {
for (; i >= 0; i--) {
dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
mr->pbl_l1_dma_addr[i]);
for (j = 0; j < pbl_bt_sz / 8; j++) {
if (i == loop_i && j >= loop_j)
break;
bt_idx = i * pbl_bt_sz / 8 + j;
dma_free_coherent(dev, pbl_bt_sz,
mr->pbl_bt_l2[bt_idx],
mr->pbl_l2_dma_addr[bt_idx]);
}
}
} else if (mhop_num == 3 && err_loop_index == 1) {
for (i -= 1; i >= 0; i--) {
dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
mr->pbl_l1_dma_addr[i]);
for (j = 0; j < pbl_bt_sz / 8; j++) {
bt_idx = i * pbl_bt_sz / 8 + j;
dma_free_coherent(dev, pbl_bt_sz,
mr->pbl_bt_l2[bt_idx],
mr->pbl_l2_dma_addr[bt_idx]);
}
}
} else if (mhop_num == 2 && err_loop_index == 1) {
for (i -= 1; i >= 0; i--)
dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
mr->pbl_l1_dma_addr[i]);
} else {
dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.",
mhop_num, err_loop_index);
return;
}
dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr);
mr->pbl_bt_l0 = NULL;
mr->pbl_l0_dma_addr = 0;
}
/* PBL multi hop addressing */
static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
struct hns_roce_mr *mr)
{
struct device *dev = hr_dev->dev;
int mr_alloc_done = 0;
int npages_allocated;
int i = 0, j = 0;
u32 pbl_bt_sz;
u32 mhop_num;
u64 pbl_last_bt_num;
u64 pbl_bt_cnt = 0;
u64 bt_idx;
u64 size;
mhop_num = hr_dev->caps.pbl_hop_num;
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
if (mhop_num == HNS_ROCE_HOP_NUM_0)
return 0;
/* hop_num = 1 */
if (mhop_num == 1) {
if (npages > pbl_bt_sz / 8) {
dev_err(dev, "npages %d is larger than buf_pg_sz!",
npages);
return -EINVAL;
}
mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
&(mr->pbl_dma_addr),
GFP_KERNEL);
if (!mr->pbl_buf)
return -ENOMEM;
mr->pbl_size = npages;
mr->pbl_ba = mr->pbl_dma_addr;
mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
return 0;
}
mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
sizeof(*mr->pbl_l1_dma_addr),
GFP_KERNEL);
if (!mr->pbl_l1_dma_addr)
return -ENOMEM;
mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
GFP_KERNEL);
if (!mr->pbl_bt_l1)
goto err_kcalloc_bt_l1;
if (mhop_num == 3) {
mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
sizeof(*mr->pbl_l2_dma_addr),
GFP_KERNEL);
if (!mr->pbl_l2_dma_addr)
goto err_kcalloc_l2_dma;
mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
sizeof(*mr->pbl_bt_l2),
GFP_KERNEL);
if (!mr->pbl_bt_l2)
goto err_kcalloc_bt_l2;
}
/* alloc L0 BT */
mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
&(mr->pbl_l0_dma_addr),
GFP_KERNEL);
if (!mr->pbl_bt_l0)
goto err_dma_alloc_l0;
if (mhop_num == 2) {
/* alloc L1 BT */
for (i = 0; i < pbl_bt_sz / 8; i++) {
if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
size = pbl_bt_sz;
} else {
npages_allocated = i * (pbl_bt_sz / 8);
size = (npages - npages_allocated) * 8;
}
mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
&(mr->pbl_l1_dma_addr[i]),
GFP_KERNEL);
if (!mr->pbl_bt_l1[i]) {
hns_roce_loop_free(hr_dev, mr, 1, i, 0);
goto err_dma_alloc_l0;
}
*(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
pbl_bt_cnt++;
if (pbl_bt_cnt >= pbl_last_bt_num)
break;
}
} else if (mhop_num == 3) {
/* alloc L1, L2 BT */
for (i = 0; i < pbl_bt_sz / 8; i++) {
mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
&(mr->pbl_l1_dma_addr[i]),
GFP_KERNEL);
if (!mr->pbl_bt_l1[i]) {
hns_roce_loop_free(hr_dev, mr, 1, i, 0);
goto err_dma_alloc_l0;
}
*(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
for (j = 0; j < pbl_bt_sz / 8; j++) {
bt_idx = i * pbl_bt_sz / 8 + j;
if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
size = pbl_bt_sz;
} else {
npages_allocated = bt_idx *
(pbl_bt_sz / 8);
size = (npages - npages_allocated) * 8;
}
mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
dev, size,
&(mr->pbl_l2_dma_addr[bt_idx]),
GFP_KERNEL);
if (!mr->pbl_bt_l2[bt_idx]) {
hns_roce_loop_free(hr_dev, mr, 2, i, j);
goto err_dma_alloc_l0;
}
*(mr->pbl_bt_l1[i] + j) =
mr->pbl_l2_dma_addr[bt_idx];
pbl_bt_cnt++;
if (pbl_bt_cnt >= pbl_last_bt_num) {
mr_alloc_done = 1;
break;
}
}
if (mr_alloc_done)
break;
}
}
mr->l0_chunk_last_num = i + 1;
if (mhop_num == 3)
mr->l1_chunk_last_num = j + 1;
mr->pbl_size = npages;
mr->pbl_ba = mr->pbl_l0_dma_addr;
mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
return 0;
err_dma_alloc_l0:
kfree(mr->pbl_bt_l2);
mr->pbl_bt_l2 = NULL;
err_kcalloc_bt_l2:
kfree(mr->pbl_l2_dma_addr);
mr->pbl_l2_dma_addr = NULL;
err_kcalloc_l2_dma:
kfree(mr->pbl_bt_l1);
mr->pbl_bt_l1 = NULL;
err_kcalloc_bt_l1:
kfree(mr->pbl_l1_dma_addr);
mr->pbl_l1_dma_addr = NULL;
return -ENOMEM;
}
static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
u64 size, u32 access, int npages,
struct hns_roce_mr *mr)
@ -282,16 +515,111 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
mr->type = MR_TYPE_DMA;
mr->pbl_buf = NULL;
mr->pbl_dma_addr = 0;
/* PBL multi-hop addressing parameters */
mr->pbl_bt_l2 = NULL;
mr->pbl_bt_l1 = NULL;
mr->pbl_bt_l0 = NULL;
mr->pbl_l2_dma_addr = NULL;
mr->pbl_l1_dma_addr = NULL;
mr->pbl_l0_dma_addr = 0;
} else {
mr->type = MR_TYPE_MR;
mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
&(mr->pbl_dma_addr),
GFP_KERNEL);
if (!mr->pbl_buf)
return -ENOMEM;
if (!hr_dev->caps.pbl_hop_num) {
mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
&(mr->pbl_dma_addr),
GFP_KERNEL);
if (!mr->pbl_buf)
return -ENOMEM;
} else {
ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
}
}
return 0;
return ret;
}
static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
struct hns_roce_mr *mr)
{
struct device *dev = hr_dev->dev;
int npages_allocated;
int npages;
int i, j;
u32 pbl_bt_sz;
u32 mhop_num;
u64 bt_idx;
npages = ib_umem_page_count(mr->umem);
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
mhop_num = hr_dev->caps.pbl_hop_num;
if (mhop_num == HNS_ROCE_HOP_NUM_0)
return;
/* hop_num = 1 */
if (mhop_num == 1) {
dma_free_coherent(dev, (unsigned int)(npages * 8),
mr->pbl_buf, mr->pbl_dma_addr);
return;
}
dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0,
mr->pbl_l0_dma_addr);
if (mhop_num == 2) {
for (i = 0; i < mr->l0_chunk_last_num; i++) {
if (i == mr->l0_chunk_last_num - 1) {
npages_allocated = i * (pbl_bt_sz / 8);
dma_free_coherent(dev,
(npages - npages_allocated) * 8,
mr->pbl_bt_l1[i],
mr->pbl_l1_dma_addr[i]);
break;
}
dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
mr->pbl_l1_dma_addr[i]);
}
} else if (mhop_num == 3) {
for (i = 0; i < mr->l0_chunk_last_num; i++) {
dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
mr->pbl_l1_dma_addr[i]);
for (j = 0; j < pbl_bt_sz / 8; j++) {
bt_idx = i * (pbl_bt_sz / 8) + j;
if ((i == mr->l0_chunk_last_num - 1)
&& j == mr->l1_chunk_last_num - 1) {
npages_allocated = bt_idx *
(pbl_bt_sz / 8);
dma_free_coherent(dev,
(npages - npages_allocated) * 8,
mr->pbl_bt_l2[bt_idx],
mr->pbl_l2_dma_addr[bt_idx]);
break;
}
dma_free_coherent(dev, pbl_bt_sz,
mr->pbl_bt_l2[bt_idx],
mr->pbl_l2_dma_addr[bt_idx]);
}
}
}
kfree(mr->pbl_bt_l1);
kfree(mr->pbl_l1_dma_addr);
mr->pbl_bt_l1 = NULL;
mr->pbl_l1_dma_addr = NULL;
if (mhop_num == 3) {
kfree(mr->pbl_bt_l2);
kfree(mr->pbl_l2_dma_addr);
mr->pbl_bt_l2 = NULL;
mr->pbl_l2_dma_addr = NULL;
}
}
static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
@ -310,10 +638,18 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
if (mr->size != ~0ULL) {
npages = ib_umem_page_count(mr->umem);
dma_free_coherent(dev, (unsigned int)(npages * 8), mr->pbl_buf,
mr->pbl_dma_addr);
if (!hr_dev->caps.pbl_hop_num)
dma_free_coherent(dev, (unsigned int)(npages * 8),
mr->pbl_buf, mr->pbl_dma_addr);
else
hns_roce_mhop_free(hr_dev, mr);
}
if (mr->enabled)
hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
key_to_hw_index(mr->key));
hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
key_to_hw_index(mr->key), BITMAP_NO_RR);
}
@ -501,8 +837,8 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev)
struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
{
int ret = 0;
struct hns_roce_mr *mr = NULL;
struct hns_roce_mr *mr;
int ret;
mr = kmalloc(sizeof(*mr), GFP_KERNEL);
if (mr == NULL)
@ -571,16 +907,36 @@ out:
return ret;
}
static int hns_roce_ib_umem_write_mr(struct hns_roce_mr *mr,
static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev,
struct hns_roce_mr *mr,
struct ib_umem *umem)
{
int i = 0;
int entry;
struct scatterlist *sg;
int i = 0, j = 0;
int entry;
if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0)
return 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
mr->pbl_buf[i] = ((u64)sg_dma_address(sg)) >> 12;
i++;
if (!hr_dev->caps.pbl_hop_num) {
mr->pbl_buf[i] = ((u64)sg_dma_address(sg)) >> 12;
i++;
} else if (hr_dev->caps.pbl_hop_num == 1) {
mr->pbl_buf[i] = sg_dma_address(sg);
i++;
} else {
if (hr_dev->caps.pbl_hop_num == 2)
mr->pbl_bt_l1[i][j] = sg_dma_address(sg);
else if (hr_dev->caps.pbl_hop_num == 3)
mr->pbl_bt_l2[i][j] = sg_dma_address(sg);
j++;
if (j >= (PAGE_SIZE / 8)) {
i++;
j = 0;
}
}
}
/* Memory barrier */
@ -595,9 +951,11 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct device *dev = hr_dev->dev;
struct hns_roce_mr *mr = NULL;
int ret = 0;
int n = 0;
struct hns_roce_mr *mr;
int bt_size;
int ret;
int n;
int i;
mr = kmalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
@ -618,11 +976,27 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_umem;
}
if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) {
dev_err(dev, " MR len %lld err. MR is limited to 4G at most!\n",
length);
ret = -EINVAL;
goto err_umem;
if (!hr_dev->caps.pbl_hop_num) {
if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) {
dev_err(dev,
" MR len %lld err. MR is limited to 4G at most!\n",
length);
ret = -EINVAL;
goto err_umem;
}
} else {
int pbl_size = 1;
bt_size = (1 << PAGE_SHIFT) / 8;
for (i = 0; i < hr_dev->caps.pbl_hop_num; i++)
pbl_size *= bt_size;
if (n > pbl_size) {
dev_err(dev,
" MR len %lld err. MR page num is limited to %d!\n",
length, pbl_size);
ret = -EINVAL;
goto err_umem;
}
}
ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
@ -630,7 +1004,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (ret)
goto err_umem;
ret = hns_roce_ib_umem_write_mr(mr, mr->umem);
ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
if (ret)
goto err_mr;