[SCSI] lpfc 8.3.33: Parallelize SLI-4 Q distribution

Commonize SLI-3/4 Ring/Queue framework, to keep SLI-3 compatibility
Parallelize SLI-4 Q distribution - to use multiple posting/completion queues

Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
This commit is contained in:
James Smart 2012-08-03 12:35:54 -04:00 committed by James Bottomley
parent 027140eab7
commit 2a76a28314
6 changed files with 77 additions and 36 deletions

View file

@ -732,7 +732,7 @@ struct lpfc_hba {
uint32_t hbq_count; /* Count of configured HBQs */
struct hbq_s hbqs[LPFC_MAX_HBQS]; /* local copy of hbq indicies */
uint32_t fcp_qidx; /* next work queue to post work to */
atomic_t fcp_qidx; /* next work queue to post work to */
unsigned long pci_bar0_map; /* Physical address for PCI BAR0 */
unsigned long pci_bar1_map; /* Physical address for PCI BAR1 */

View file

@ -1188,8 +1188,8 @@ typedef struct {
*/
/* Number of rings currently used and available. */
#define MAX_CONFIGURED_RINGS 3
#define MAX_RINGS 4
#define MAX_SLI3_CONFIGURED_RINGS 3
#define MAX_SLI3_RINGS 4
/* IOCB / Mailbox is owned by FireFly */
#define OWN_CHIP 1
@ -2993,7 +2993,7 @@ typedef struct _PCB {
uint32_t pgpAddrLow;
uint32_t pgpAddrHigh;
SLI2_RDSC rdsc[MAX_RINGS];
SLI2_RDSC rdsc[MAX_SLI3_RINGS];
} PCB_t;
/* NEW_FEATURE */
@ -3103,18 +3103,18 @@ struct lpfc_pgp {
struct sli2_desc {
uint32_t unused1[16];
struct lpfc_hgp host[MAX_RINGS];
struct lpfc_pgp port[MAX_RINGS];
struct lpfc_hgp host[MAX_SLI3_RINGS];
struct lpfc_pgp port[MAX_SLI3_RINGS];
};
struct sli3_desc {
struct lpfc_hgp host[MAX_RINGS];
struct lpfc_hgp host[MAX_SLI3_RINGS];
uint32_t reserved[8];
uint32_t hbq_put[16];
};
struct sli3_pgp {
struct lpfc_pgp port[MAX_RINGS];
struct lpfc_pgp port[MAX_SLI3_RINGS];
uint32_t hbq_get[16];
};

View file

@ -4551,6 +4551,13 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
phba->cfg_sg_seg_cnt = LPFC_DEFAULT_MENLO_SG_SEG_CNT;
}
if (!phba->sli.ring)
phba->sli.ring = (struct lpfc_sli_ring *)
kzalloc(LPFC_SLI3_MAX_RING *
sizeof(struct lpfc_sli_ring), GFP_KERNEL);
if (!phba->sli.ring)
return -ENOMEM;
/*
* Since the sg_tablesize is module parameter, the sg_dma_buf_size
* used to create the sg_dma_buf_pool must be dynamically calculated.
@ -4709,6 +4716,16 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
if (phba->cfg_enable_bg)
sges_per_segment = 2;
/*
* For SLI4, instead of using ring 0 (LPFC_FCP_RING) for FCP commands
* we will associate a new ring, for each FCP fastpath EQ/CQ/WQ tuple.
*/
if (!phba->sli.ring)
phba->sli.ring = kzalloc(
(LPFC_SLI3_MAX_RING + phba->cfg_fcp_eq_count) *
sizeof(struct lpfc_sli_ring), GFP_KERNEL);
if (!phba->sli.ring)
return -ENOMEM;
/*
* Since the sg_tablesize is module parameter, the sg_dma_buf_size
* used to create the sg_dma_buf_pool must be dynamically calculated.
@ -5555,6 +5572,10 @@ lpfc_hba_free(struct lpfc_hba *phba)
/* Release the driver assigned board number */
idr_remove(&lpfc_hba_index, phba->brd_no);
/* Free memory allocated with sli rings */
kfree(phba->sli.ring);
phba->sli.ring = NULL;
kfree(phba);
return;
}
@ -6924,6 +6945,8 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
int
lpfc_sli4_queue_setup(struct lpfc_hba *phba)
{
struct lpfc_sli *psli = &phba->sli;
struct lpfc_sli_ring *pring;
int rc = -ENOMEM;
int fcp_eqidx, fcp_cqidx, fcp_wqidx;
int fcp_cq_index = 0;
@ -7107,6 +7130,12 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
"rc = 0x%x\n", rc);
goto out_destroy_mbx_wq;
}
/* Bind this WQ to the ELS ring */
pring = &psli->ring[LPFC_ELS_RING];
pring->sli.sli4.wqp = (void *)phba->sli4_hba.els_wq;
phba->sli4_hba.els_cq->pring = pring;
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"2590 ELS WQ setup: wq-id=%d, parent cq-id=%d\n",
phba->sli4_hba.els_wq->queue_id,
@ -7137,6 +7166,12 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
"WQ (%d), rc = 0x%x\n", fcp_wqidx, rc);
goto out_destroy_fcp_wq;
}
/* Bind this WQ to the next FCP ring */
pring = &psli->ring[MAX_SLI3_CONFIGURED_RINGS + fcp_wqidx];
pring->sli.sli4.wqp = (void *)phba->sli4_hba.fcp_wq[fcp_wqidx];
phba->sli4_hba.fcp_cq[fcp_cq_index]->pring = pring;
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"2591 FCP WQ setup: wq[%d]-id=%d, "
"parent cq[%d]-id=%d\n",

View file

@ -7796,14 +7796,14 @@ lpfc_sli4_bpl2sgl(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq,
*
* Return: index into SLI4 fast-path FCP queue index.
**/
static uint32_t
static inline uint32_t
lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba)
{
++phba->fcp_qidx;
if (phba->fcp_qidx >= phba->cfg_fcp_wq_count)
phba->fcp_qidx = 0;
int i;
return phba->fcp_qidx;
i = atomic_add_return(1, &phba->fcp_qidx);
i = (i % phba->cfg_fcp_wq_count);
return i;
}
/**
@ -8323,16 +8323,6 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
if ((piocb->iocb_flag & LPFC_IO_FCP) ||
(piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
/*
* For FCP command IOCB, get a new WQ index to distribute
* WQE across the WQsr. On the other hand, for abort IOCB,
* it carries the same WQ index to the original command
* IOCB.
*/
if (piocb->iocb_flag & LPFC_IO_FCP)
piocb->fcp_wqidx = lpfc_sli4_scmd_to_wqidx_distr(phba);
if (unlikely(!phba->sli4_hba.fcp_wq))
return IOCB_ERROR;
if (lpfc_sli4_wq_put(phba->sli4_hba.fcp_wq[piocb->fcp_wqidx],
&wqe))
return IOCB_ERROR;
@ -8413,11 +8403,18 @@ int
lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
struct lpfc_iocbq *piocb, uint32_t flag)
{
struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
struct lpfc_sli_ring *pring;
unsigned long iflags;
int rc;
int rc, idx;
if (phba->sli_rev == LPFC_SLI_REV4) {
if (piocb->iocb_flag & LPFC_IO_FCP) {
if (unlikely(!phba->sli4_hba.fcp_wq))
return IOCB_ERROR;
idx = lpfc_sli4_scmd_to_wqidx_distr(phba);
piocb->fcp_wqidx = idx;
ring_number = MAX_SLI3_CONFIGURED_RINGS + idx;
}
pring = &phba->sli.ring[ring_number];
spin_lock_irqsave(&pring->ring_lock, iflags);
rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
@ -8712,7 +8709,9 @@ lpfc_sli_setup(struct lpfc_hba *phba)
struct lpfc_sli *psli = &phba->sli;
struct lpfc_sli_ring *pring;
psli->num_rings = MAX_CONFIGURED_RINGS;
psli->num_rings = MAX_SLI3_CONFIGURED_RINGS;
if (phba->sli_rev == LPFC_SLI_REV4)
psli->num_rings += phba->cfg_fcp_eq_count;
psli->sli_flag = 0;
psli->fcp_ring = LPFC_FCP_RING;
psli->next_ring = LPFC_FCP_NEXT_RING;
@ -11191,6 +11190,7 @@ lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_cqe *cqe)
/**
* lpfc_sli4_sp_handle_els_wcqe - Handle els work-queue completion event
* @phba: Pointer to HBA context object.
* @cq: Pointer to associated CQ
* @wcqe: Pointer to work-queue completion queue entry.
*
* This routine handles an ELS work-queue completion event.
@ -11198,12 +11198,12 @@ lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_cqe *cqe)
* Return: true if work posted to worker thread, otherwise false.
**/
static bool
lpfc_sli4_sp_handle_els_wcqe(struct lpfc_hba *phba,
lpfc_sli4_sp_handle_els_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
struct lpfc_wcqe_complete *wcqe)
{
struct lpfc_iocbq *irspiocbq;
unsigned long iflags;
struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
struct lpfc_sli_ring *pring = cq->pring;
/* Get an irspiocbq for later ELS response processing use */
irspiocbq = lpfc_sli_get_iocbq(phba);
@ -11408,7 +11408,7 @@ lpfc_sli4_sp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
case CQE_CODE_COMPL_WQE:
/* Process the WQ/RQ complete event */
phba->last_completion_time = jiffies;
workposted = lpfc_sli4_sp_handle_els_wcqe(phba,
workposted = lpfc_sli4_sp_handle_els_wcqe(phba, cq,
(struct lpfc_wcqe_complete *)&cqevt);
break;
case CQE_CODE_RELEASE_WQE:
@ -11540,16 +11540,18 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe)
/**
* lpfc_sli4_fp_handle_fcp_wcqe - Process fast-path work queue completion entry
* @eqe: Pointer to fast-path completion queue entry.
* @phba: Pointer to HBA context object.
* @cq: Pointer to associated CQ
* @wcqe: Pointer to work-queue completion queue entry.
*
* This routine process a fast-path work queue completion entry from fast-path
* event queue for FCP command response completion.
**/
static void
lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba,
lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
struct lpfc_wcqe_complete *wcqe)
{
struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_FCP_RING];
struct lpfc_sli_ring *pring = cq->pring;
struct lpfc_iocbq *cmdiocbq;
struct lpfc_iocbq irspiocbq;
unsigned long iflags;
@ -11667,7 +11669,7 @@ lpfc_sli4_fp_handle_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
cq->CQ_wq++;
/* Process the WQ complete event */
phba->last_completion_time = jiffies;
lpfc_sli4_fp_handle_fcp_wcqe(phba,
lpfc_sli4_fp_handle_fcp_wcqe(phba, cq,
(struct lpfc_wcqe_complete *)&wcqe);
break;
case CQE_CODE_RELEASE_WQE:

View file

@ -131,7 +131,9 @@ typedef struct lpfcMboxq {
#define LPFC_MAX_RING_MASK 5 /* max num of rctl/type masks allowed per
ring */
#define LPFC_MAX_RING 4 /* max num of SLI rings used by driver */
#define LPFC_SLI3_MAX_RING 4 /* Max num of SLI3 rings used by driver.
For SLI4, an additional ring for each
FCP WQ will be allocated. */
struct lpfc_sli_ring;
@ -172,7 +174,7 @@ struct lpfc_sli3_ring {
};
struct lpfc_sli4_ring {
void *wqp; /* Pointer to associated WQ */
struct lpfc_queue *wqp; /* Pointer to associated WQ */
};
@ -284,7 +286,7 @@ struct lpfc_sli {
#define LPFC_MENLO_MAINT 0x1000 /* need for menl fw download */
#define LPFC_SLI_ASYNC_MBX_BLK 0x2000 /* Async mailbox is blocked */
struct lpfc_sli_ring ring[LPFC_MAX_RING];
struct lpfc_sli_ring *ring;
int fcp_ring; /* ring used for FCP initiator commands */
int next_ring;

View file

@ -142,6 +142,8 @@ struct lpfc_queue {
uint32_t host_index; /* The host's index for putting or getting */
uint32_t hba_index; /* The last known hba index for get or put */
struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */
/* For q stats */
uint32_t q_cnt_1;
uint32_t q_cnt_2;