scsi: ufs: Improve UFS fatal error handling
Error handling in UFS driver is broken and resets the host controller for fatal errors without re-initialization. Correct the fatal error handling sequence according to UFS Host Controller Interface (HCI) v1.1 specification. o Processed requests which are completed w/wo error are reported to SCSI layer and any pending commands that are not started are aborted in the controller and re-queued into scsi mid-layer queue. o Upon determining fatal error condition the host controller may hang forever until a reset is applied. Block SCSI layer for sending new requests and apply reset in a separate error handling work. o SCSI is informed about the expected Unit-Attention exception from the device for the immediate command after a reset so that the SCSI layer take necessary steps to establish communication with the device. Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org> Reviewed-by: Yaniv Gardi <ygardi@codeaurora.org> Tested-by: Dolev Raviv <draviv@codeaurora.org> Acked-by: Vinayak Holikatti <vinholikatti@gmail.com> Signed-off-by: Christoph Hellwig <hch@lst.de>hifive-unleashed-5.1
parent
3441da7ddb
commit
e8e7f27139
|
@ -84,6 +84,14 @@ enum {
|
||||||
UFSHCD_EH_IN_PROGRESS = (1 << 0),
|
UFSHCD_EH_IN_PROGRESS = (1 << 0),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* UFSHCD UIC layer error flags */
|
||||||
|
enum {
|
||||||
|
UFSHCD_UIC_DL_PA_INIT_ERROR = (1 << 0), /* Data link layer error */
|
||||||
|
UFSHCD_UIC_NL_ERROR = (1 << 1), /* Network layer error */
|
||||||
|
UFSHCD_UIC_TL_ERROR = (1 << 2), /* Transport Layer error */
|
||||||
|
UFSHCD_UIC_DME_ERROR = (1 << 3), /* DME error */
|
||||||
|
};
|
||||||
|
|
||||||
/* Interrupt configuration options */
|
/* Interrupt configuration options */
|
||||||
enum {
|
enum {
|
||||||
UFSHCD_INT_DISABLE,
|
UFSHCD_INT_DISABLE,
|
||||||
|
@ -100,6 +108,8 @@ enum {
|
||||||
|
|
||||||
static void ufshcd_tmc_handler(struct ufs_hba *hba);
|
static void ufshcd_tmc_handler(struct ufs_hba *hba);
|
||||||
static void ufshcd_async_scan(void *data, async_cookie_t cookie);
|
static void ufshcd_async_scan(void *data, async_cookie_t cookie);
|
||||||
|
static int ufshcd_reset_and_restore(struct ufs_hba *hba);
|
||||||
|
static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ufshcd_wait_for_register - wait for register value to change
|
* ufshcd_wait_for_register - wait for register value to change
|
||||||
|
@ -1735,9 +1745,6 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hba->ufshcd_state == UFSHCD_STATE_RESET)
|
|
||||||
scsi_unblock_requests(hba->host);
|
|
||||||
|
|
||||||
out:
|
out:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -1862,66 +1869,6 @@ static int ufshcd_verify_dev_init(struct ufs_hba *hba)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* ufshcd_do_reset - reset the host controller
|
|
||||||
* @hba: per adapter instance
|
|
||||||
*
|
|
||||||
* Returns SUCCESS/FAILED
|
|
||||||
*/
|
|
||||||
static int ufshcd_do_reset(struct ufs_hba *hba)
|
|
||||||
{
|
|
||||||
struct ufshcd_lrb *lrbp;
|
|
||||||
unsigned long flags;
|
|
||||||
int tag;
|
|
||||||
|
|
||||||
/* block commands from midlayer */
|
|
||||||
scsi_block_requests(hba->host);
|
|
||||||
|
|
||||||
spin_lock_irqsave(hba->host->host_lock, flags);
|
|
||||||
hba->ufshcd_state = UFSHCD_STATE_RESET;
|
|
||||||
|
|
||||||
/* send controller to reset state */
|
|
||||||
ufshcd_hba_stop(hba);
|
|
||||||
spin_unlock_irqrestore(hba->host->host_lock, flags);
|
|
||||||
|
|
||||||
/* abort outstanding commands */
|
|
||||||
for (tag = 0; tag < hba->nutrs; tag++) {
|
|
||||||
if (test_bit(tag, &hba->outstanding_reqs)) {
|
|
||||||
lrbp = &hba->lrb[tag];
|
|
||||||
if (lrbp->cmd) {
|
|
||||||
scsi_dma_unmap(lrbp->cmd);
|
|
||||||
lrbp->cmd->result = DID_RESET << 16;
|
|
||||||
lrbp->cmd->scsi_done(lrbp->cmd);
|
|
||||||
lrbp->cmd = NULL;
|
|
||||||
clear_bit_unlock(tag, &hba->lrb_in_use);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* complete device management command */
|
|
||||||
if (hba->dev_cmd.complete)
|
|
||||||
complete(hba->dev_cmd.complete);
|
|
||||||
|
|
||||||
/* clear outstanding request/task bit maps */
|
|
||||||
hba->outstanding_reqs = 0;
|
|
||||||
hba->outstanding_tasks = 0;
|
|
||||||
|
|
||||||
/* Host controller enable */
|
|
||||||
if (ufshcd_hba_enable(hba)) {
|
|
||||||
dev_err(hba->dev,
|
|
||||||
"Reset: Controller initialization failed\n");
|
|
||||||
return FAILED;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ufshcd_link_startup(hba)) {
|
|
||||||
dev_err(hba->dev,
|
|
||||||
"Reset: Link start-up failed\n");
|
|
||||||
return FAILED;
|
|
||||||
}
|
|
||||||
|
|
||||||
return SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ufshcd_slave_alloc - handle initial SCSI device configurations
|
* ufshcd_slave_alloc - handle initial SCSI device configurations
|
||||||
* @sdev: pointer to SCSI device
|
* @sdev: pointer to SCSI device
|
||||||
|
@ -1939,6 +1886,9 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev)
|
||||||
sdev->use_10_for_ms = 1;
|
sdev->use_10_for_ms = 1;
|
||||||
scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
|
scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
|
||||||
|
|
||||||
|
/* allow SCSI layer to restart the device in case of errors */
|
||||||
|
sdev->allow_restart = 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Inform SCSI Midlayer that the LUN queue depth is same as the
|
* Inform SCSI Midlayer that the LUN queue depth is same as the
|
||||||
* controller queue depth. If a LUN queue depth is less than the
|
* controller queue depth. If a LUN queue depth is less than the
|
||||||
|
@ -2134,6 +2084,9 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
|
||||||
case OCS_ABORTED:
|
case OCS_ABORTED:
|
||||||
result |= DID_ABORT << 16;
|
result |= DID_ABORT << 16;
|
||||||
break;
|
break;
|
||||||
|
case OCS_INVALID_COMMAND_STATUS:
|
||||||
|
result |= DID_REQUEUE << 16;
|
||||||
|
break;
|
||||||
case OCS_INVALID_CMD_TABLE_ATTR:
|
case OCS_INVALID_CMD_TABLE_ATTR:
|
||||||
case OCS_INVALID_PRDT_ATTR:
|
case OCS_INVALID_PRDT_ATTR:
|
||||||
case OCS_MISMATCH_DATA_BUF_SIZE:
|
case OCS_MISMATCH_DATA_BUF_SIZE:
|
||||||
|
@ -2451,45 +2404,145 @@ out:
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ufshcd_fatal_err_handler - handle fatal errors
|
* ufshcd_err_handler - handle UFS errors that require s/w attention
|
||||||
* @hba: per adapter instance
|
* @work: pointer to work structure
|
||||||
*/
|
*/
|
||||||
static void ufshcd_fatal_err_handler(struct work_struct *work)
|
static void ufshcd_err_handler(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct ufs_hba *hba;
|
struct ufs_hba *hba;
|
||||||
hba = container_of(work, struct ufs_hba, feh_workq);
|
unsigned long flags;
|
||||||
|
u32 err_xfer = 0;
|
||||||
|
u32 err_tm = 0;
|
||||||
|
int err = 0;
|
||||||
|
int tag;
|
||||||
|
|
||||||
|
hba = container_of(work, struct ufs_hba, eh_work);
|
||||||
|
|
||||||
pm_runtime_get_sync(hba->dev);
|
pm_runtime_get_sync(hba->dev);
|
||||||
/* check if reset is already in progress */
|
|
||||||
if (hba->ufshcd_state != UFSHCD_STATE_RESET)
|
spin_lock_irqsave(hba->host->host_lock, flags);
|
||||||
ufshcd_do_reset(hba);
|
if (hba->ufshcd_state == UFSHCD_STATE_RESET) {
|
||||||
|
spin_unlock_irqrestore(hba->host->host_lock, flags);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
hba->ufshcd_state = UFSHCD_STATE_RESET;
|
||||||
|
ufshcd_set_eh_in_progress(hba);
|
||||||
|
|
||||||
|
/* Complete requests that have door-bell cleared by h/w */
|
||||||
|
ufshcd_transfer_req_compl(hba);
|
||||||
|
ufshcd_tmc_handler(hba);
|
||||||
|
spin_unlock_irqrestore(hba->host->host_lock, flags);
|
||||||
|
|
||||||
|
/* Clear pending transfer requests */
|
||||||
|
for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs)
|
||||||
|
if (ufshcd_clear_cmd(hba, tag))
|
||||||
|
err_xfer |= 1 << tag;
|
||||||
|
|
||||||
|
/* Clear pending task management requests */
|
||||||
|
for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs)
|
||||||
|
if (ufshcd_clear_tm_cmd(hba, tag))
|
||||||
|
err_tm |= 1 << tag;
|
||||||
|
|
||||||
|
/* Complete the requests that are cleared by s/w */
|
||||||
|
spin_lock_irqsave(hba->host->host_lock, flags);
|
||||||
|
ufshcd_transfer_req_compl(hba);
|
||||||
|
ufshcd_tmc_handler(hba);
|
||||||
|
spin_unlock_irqrestore(hba->host->host_lock, flags);
|
||||||
|
|
||||||
|
/* Fatal errors need reset */
|
||||||
|
if (err_xfer || err_tm || (hba->saved_err & INT_FATAL_ERRORS) ||
|
||||||
|
((hba->saved_err & UIC_ERROR) &&
|
||||||
|
(hba->saved_uic_err & UFSHCD_UIC_DL_PA_INIT_ERROR))) {
|
||||||
|
err = ufshcd_reset_and_restore(hba);
|
||||||
|
if (err) {
|
||||||
|
dev_err(hba->dev, "%s: reset and restore failed\n",
|
||||||
|
__func__);
|
||||||
|
hba->ufshcd_state = UFSHCD_STATE_ERROR;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Inform scsi mid-layer that we did reset and allow to handle
|
||||||
|
* Unit Attention properly.
|
||||||
|
*/
|
||||||
|
scsi_report_bus_reset(hba->host, 0);
|
||||||
|
hba->saved_err = 0;
|
||||||
|
hba->saved_uic_err = 0;
|
||||||
|
}
|
||||||
|
ufshcd_clear_eh_in_progress(hba);
|
||||||
|
|
||||||
|
out:
|
||||||
|
scsi_unblock_requests(hba->host);
|
||||||
pm_runtime_put_sync(hba->dev);
|
pm_runtime_put_sync(hba->dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ufshcd_err_handler - Check for fatal errors
|
* ufshcd_update_uic_error - check and set fatal UIC error flags.
|
||||||
* @work: pointer to a work queue structure
|
* @hba: per-adapter instance
|
||||||
*/
|
*/
|
||||||
static void ufshcd_err_handler(struct ufs_hba *hba)
|
static void ufshcd_update_uic_error(struct ufs_hba *hba)
|
||||||
{
|
{
|
||||||
u32 reg;
|
u32 reg;
|
||||||
|
|
||||||
|
/* PA_INIT_ERROR is fatal and needs UIC reset */
|
||||||
|
reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
|
||||||
|
if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
|
||||||
|
hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR;
|
||||||
|
|
||||||
|
/* UIC NL/TL/DME errors needs software retry */
|
||||||
|
reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER);
|
||||||
|
if (reg)
|
||||||
|
hba->uic_error |= UFSHCD_UIC_NL_ERROR;
|
||||||
|
|
||||||
|
reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_TRANSPORT_LAYER);
|
||||||
|
if (reg)
|
||||||
|
hba->uic_error |= UFSHCD_UIC_TL_ERROR;
|
||||||
|
|
||||||
|
reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DME);
|
||||||
|
if (reg)
|
||||||
|
hba->uic_error |= UFSHCD_UIC_DME_ERROR;
|
||||||
|
|
||||||
|
dev_dbg(hba->dev, "%s: UIC error flags = 0x%08x\n",
|
||||||
|
__func__, hba->uic_error);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ufshcd_check_errors - Check for errors that need s/w attention
|
||||||
|
* @hba: per-adapter instance
|
||||||
|
*/
|
||||||
|
static void ufshcd_check_errors(struct ufs_hba *hba)
|
||||||
|
{
|
||||||
|
bool queue_eh_work = false;
|
||||||
|
|
||||||
if (hba->errors & INT_FATAL_ERRORS)
|
if (hba->errors & INT_FATAL_ERRORS)
|
||||||
goto fatal_eh;
|
queue_eh_work = true;
|
||||||
|
|
||||||
if (hba->errors & UIC_ERROR) {
|
if (hba->errors & UIC_ERROR) {
|
||||||
reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
|
hba->uic_error = 0;
|
||||||
if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
|
ufshcd_update_uic_error(hba);
|
||||||
goto fatal_eh;
|
if (hba->uic_error)
|
||||||
|
queue_eh_work = true;
|
||||||
}
|
}
|
||||||
return;
|
|
||||||
fatal_eh:
|
if (queue_eh_work) {
|
||||||
/* handle fatal errors only when link is functional */
|
/* handle fatal errors only when link is functional */
|
||||||
if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
|
if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
|
||||||
/* block commands at driver layer until error is handled */
|
/* block commands from scsi mid-layer */
|
||||||
hba->ufshcd_state = UFSHCD_STATE_ERROR;
|
scsi_block_requests(hba->host);
|
||||||
schedule_work(&hba->feh_workq);
|
|
||||||
|
/* transfer error masks to sticky bits */
|
||||||
|
hba->saved_err |= hba->errors;
|
||||||
|
hba->saved_uic_err |= hba->uic_error;
|
||||||
|
|
||||||
|
hba->ufshcd_state = UFSHCD_STATE_ERROR;
|
||||||
|
schedule_work(&hba->eh_work);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* if (!queue_eh_work) -
|
||||||
|
* Other errors are either non-fatal where host recovers
|
||||||
|
* itself without s/w intervention or errors that will be
|
||||||
|
* handled by the SCSI core layer.
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2514,7 +2567,7 @@ static void ufshcd_sl_intr(struct ufs_hba *hba, u32 intr_status)
|
||||||
{
|
{
|
||||||
hba->errors = UFSHCD_ERROR_MASK & intr_status;
|
hba->errors = UFSHCD_ERROR_MASK & intr_status;
|
||||||
if (hba->errors)
|
if (hba->errors)
|
||||||
ufshcd_err_handler(hba);
|
ufshcd_check_errors(hba);
|
||||||
|
|
||||||
if (intr_status & UFSHCD_UIC_MASK)
|
if (intr_status & UFSHCD_UIC_MASK)
|
||||||
ufshcd_uic_cmd_compl(hba, intr_status);
|
ufshcd_uic_cmd_compl(hba, intr_status);
|
||||||
|
@ -2889,12 +2942,12 @@ static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
|
||||||
*/
|
*/
|
||||||
do {
|
do {
|
||||||
spin_lock_irqsave(hba->host->host_lock, flags);
|
spin_lock_irqsave(hba->host->host_lock, flags);
|
||||||
if (!(work_pending(&hba->feh_workq) ||
|
if (!(work_pending(&hba->eh_work) ||
|
||||||
hba->ufshcd_state == UFSHCD_STATE_RESET))
|
hba->ufshcd_state == UFSHCD_STATE_RESET))
|
||||||
break;
|
break;
|
||||||
spin_unlock_irqrestore(hba->host->host_lock, flags);
|
spin_unlock_irqrestore(hba->host->host_lock, flags);
|
||||||
dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
|
dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
|
||||||
flush_work(&hba->feh_workq);
|
flush_work(&hba->eh_work);
|
||||||
} while (1);
|
} while (1);
|
||||||
|
|
||||||
hba->ufshcd_state = UFSHCD_STATE_RESET;
|
hba->ufshcd_state = UFSHCD_STATE_RESET;
|
||||||
|
@ -3130,7 +3183,7 @@ int ufshcd_init(struct device *dev, struct ufs_hba **hba_handle,
|
||||||
init_waitqueue_head(&hba->tm_tag_wq);
|
init_waitqueue_head(&hba->tm_tag_wq);
|
||||||
|
|
||||||
/* Initialize work queues */
|
/* Initialize work queues */
|
||||||
INIT_WORK(&hba->feh_workq, ufshcd_fatal_err_handler);
|
INIT_WORK(&hba->eh_work, ufshcd_err_handler);
|
||||||
INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler);
|
INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler);
|
||||||
|
|
||||||
/* Initialize UIC command mutex */
|
/* Initialize UIC command mutex */
|
||||||
|
|
|
@ -183,9 +183,12 @@ struct ufs_dev_cmd {
|
||||||
* @eh_flags: Error handling flags
|
* @eh_flags: Error handling flags
|
||||||
* @intr_mask: Interrupt Mask Bits
|
* @intr_mask: Interrupt Mask Bits
|
||||||
* @ee_ctrl_mask: Exception event control mask
|
* @ee_ctrl_mask: Exception event control mask
|
||||||
* @feh_workq: Work queue for fatal controller error handling
|
* @eh_work: Worker to handle UFS errors that require s/w attention
|
||||||
* @eeh_work: Worker to handle exception events
|
* @eeh_work: Worker to handle exception events
|
||||||
* @errors: HBA errors
|
* @errors: HBA errors
|
||||||
|
* @uic_error: UFS interconnect layer error status
|
||||||
|
* @saved_err: sticky error mask
|
||||||
|
* @saved_uic_err: sticky UIC error mask
|
||||||
* @dev_cmd: ufs device management command information
|
* @dev_cmd: ufs device management command information
|
||||||
* @auto_bkops_enabled: to track whether bkops is enabled in device
|
* @auto_bkops_enabled: to track whether bkops is enabled in device
|
||||||
*/
|
*/
|
||||||
|
@ -233,11 +236,14 @@ struct ufs_hba {
|
||||||
u16 ee_ctrl_mask;
|
u16 ee_ctrl_mask;
|
||||||
|
|
||||||
/* Work Queues */
|
/* Work Queues */
|
||||||
struct work_struct feh_workq;
|
struct work_struct eh_work;
|
||||||
struct work_struct eeh_work;
|
struct work_struct eeh_work;
|
||||||
|
|
||||||
/* HBA Errors */
|
/* HBA Errors */
|
||||||
u32 errors;
|
u32 errors;
|
||||||
|
u32 uic_error;
|
||||||
|
u32 saved_err;
|
||||||
|
u32 saved_uic_err;
|
||||||
|
|
||||||
/* Device management request data */
|
/* Device management request data */
|
||||||
struct ufs_dev_cmd dev_cmd;
|
struct ufs_dev_cmd dev_cmd;
|
||||||
|
|
Loading…
Reference in New Issue