1
0
Fork 0

Merge branch 'qed-Error-recovery-process'

Michal Kalderon says:

====================
qed*: Error recovery process

Parity errors might happen in the device's memories due to momentary bit
flips which are caused by radiation.
Errors that are not correctable initiate a process kill event, which blocks
the device access towards the host and the network, and a recovery process
is started in the management FW and in the driver.

This series adds the support of this process in the qed core module and in
the qede driver (patches 2 & 3).
Patch 1 in the series revises the load sequence, to avoid PCI errors that
might be observed during a recovery process.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
hifive-unleashed-5.1
David S. Miller 2019-01-22 17:30:39 -08:00
commit 8fb18be93e
18 changed files with 745 additions and 204 deletions

View File

@ -554,7 +554,6 @@ struct qed_hwfn {
u8 dp_level;
char name[NAME_SIZE];
bool first_on_engine;
bool hw_init_done;
u8 num_funcs_on_engine;
@ -805,6 +804,9 @@ struct qed_dev {
u32 mcp_nvm_resp;
/* Recovery */
bool recov_in_prog;
/* Linux specific here */
struct qede_dev *edev;
struct pci_dev *pdev;
@ -944,6 +946,7 @@ void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt);
u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
u32 input_len, u8 *input_buf,
u32 max_size, u8 *unzip_buf);
void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
void qed_get_protocol_stats(struct qed_dev *cdev,
enum qed_mcp_protocol_type type,
union qed_mcp_protocol_stats *stats);

View File

@ -1959,11 +1959,6 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
(p_hwfn->hw_info.personality == QED_PCI_FCOE) ? 1 : 0);
STORE_RT_REG(p_hwfn, PRS_REG_SEARCH_ROCE_RT_OFFSET, 0);
/* Cleanup chip from previous driver if such remains exist */
rc = qed_final_cleanup(p_hwfn, p_ptt, rel_pf_id, false);
if (rc)
return rc;
/* Sanity check before the PF init sequence that uses DMAE */
rc = qed_dmae_sanity(p_hwfn, p_ptt, "pf_phase");
if (rc)
@ -2007,17 +2002,15 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
return rc;
}
static int qed_change_pci_hwfn(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
u8 enable)
int qed_pglueb_set_pfid_enable(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, bool b_enable)
{
u32 delay_idx = 0, val, set_val = enable ? 1 : 0;
u32 delay_idx = 0, val, set_val = b_enable ? 1 : 0;
/* Change PF in PXP */
qed_wr(p_hwfn, p_ptt,
PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, set_val);
/* Configure the PF's internal FID_enable for master transactions */
qed_wr(p_hwfn, p_ptt, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, set_val);
/* wait until value is set - try for 1 second every 50us */
/* Wait until value is set - try for 1 second every 50us */
for (delay_idx = 0; delay_idx < 20000; delay_idx++) {
val = qed_rd(p_hwfn, p_ptt,
PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER);
@ -2071,13 +2064,19 @@ static int qed_vf_start(struct qed_hwfn *p_hwfn,
return 0;
}
static void qed_pglueb_clear_err(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
qed_wr(p_hwfn, p_ptt, PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR,
BIT(p_hwfn->abs_pf_id));
}
int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
{
struct qed_load_req_params load_req_params;
u32 load_code, resp, param, drv_mb_param;
bool b_default_mtu = true;
struct qed_hwfn *p_hwfn;
int rc = 0, mfw_rc, i;
int rc = 0, i;
u16 ether_type;
if ((p_params->int_mode == QED_INT_MODE_MSI) && (cdev->num_hwfns > 1)) {
@ -2092,7 +2091,7 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
}
for_each_hwfn(cdev, i) {
struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
p_hwfn = &cdev->hwfns[i];
/* If management didn't provide a default, set one of our own */
if (!p_hwfn->hw_info.mtu) {
@ -2105,9 +2104,6 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
continue;
}
/* Enable DMAE in PXP */
rc = qed_change_pci_hwfn(p_hwfn, p_hwfn->p_main_ptt, true);
rc = qed_calc_hw_mode(p_hwfn);
if (rc)
return rc;
@ -2144,12 +2140,43 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
"Load request was sent. Load code: 0x%x\n",
load_code);
/* Only relevant for recovery:
* Clear the indication after LOAD_REQ is responded by the MFW.
*/
cdev->recov_in_prog = false;
qed_mcp_set_capabilities(p_hwfn, p_hwfn->p_main_ptt);
qed_reset_mb_shadow(p_hwfn, p_hwfn->p_main_ptt);
p_hwfn->first_on_engine = (load_code ==
FW_MSG_CODE_DRV_LOAD_ENGINE);
/* Clean up chip from previous driver if such remains exist.
* This is not needed when the PF is the first one on the
* engine, since afterwards we are going to init the FW.
*/
if (load_code != FW_MSG_CODE_DRV_LOAD_ENGINE) {
rc = qed_final_cleanup(p_hwfn, p_hwfn->p_main_ptt,
p_hwfn->rel_pf_id, false);
if (rc) {
DP_NOTICE(p_hwfn, "Final cleanup failed\n");
goto load_err;
}
}
/* Log and clear previous pglue_b errors if such exist */
qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_main_ptt);
/* Enable the PF's internal FID_enable in the PXP */
rc = qed_pglueb_set_pfid_enable(p_hwfn, p_hwfn->p_main_ptt,
true);
if (rc)
goto load_err;
/* Clear the pglue_b was_error indication.
* In E4 it must be done after the BME and the internal
* FID_enable for the PF are set, since VDMs may cause the
* indication to be set again.
*/
qed_pglueb_clear_err(p_hwfn, p_hwfn->p_main_ptt);
switch (load_code) {
case FW_MSG_CODE_DRV_LOAD_ENGINE:
@ -2180,39 +2207,29 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
break;
}
if (rc)
if (rc) {
DP_NOTICE(p_hwfn,
"init phase failed for loadcode 0x%x (rc %d)\n",
load_code, rc);
/* ACK mfw regardless of success or failure of initialization */
mfw_rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
DRV_MSG_CODE_LOAD_DONE,
0, &load_code, &param);
if (rc)
return rc;
if (mfw_rc) {
DP_NOTICE(p_hwfn, "Failed sending LOAD_DONE command\n");
return mfw_rc;
load_code, rc);
goto load_err;
}
/* Check if there is a DID mismatch between nvm-cfg/efuse */
if (param & FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR)
DP_NOTICE(p_hwfn,
"warning: device configuration is not supported on this board type. The device may not function as expected.\n");
rc = qed_mcp_load_done(p_hwfn, p_hwfn->p_main_ptt);
if (rc)
return rc;
/* send DCBX attention request command */
DP_VERBOSE(p_hwfn,
QED_MSG_DCB,
"sending phony dcbx set command to trigger DCBx attention handling\n");
mfw_rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
DRV_MSG_CODE_SET_DCBX,
1 << DRV_MB_PARAM_DCBX_NOTIFY_SHIFT,
&load_code, &param);
if (mfw_rc) {
rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
DRV_MSG_CODE_SET_DCBX,
1 << DRV_MB_PARAM_DCBX_NOTIFY_SHIFT,
&resp, &param);
if (rc) {
DP_NOTICE(p_hwfn,
"Failed to send DCBX attention request\n");
return mfw_rc;
return rc;
}
p_hwfn->hw_init_done = true;
@ -2261,6 +2278,12 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
}
return 0;
load_err:
/* The MFW load lock should be released also when initialization fails.
*/
qed_mcp_load_done(p_hwfn, p_hwfn->p_main_ptt);
return rc;
}
#define QED_HW_STOP_RETRY_LIMIT (10)
@ -2273,6 +2296,9 @@ static void qed_hw_timers_stop(struct qed_dev *cdev,
qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_CONN, 0x0);
qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_TASK, 0x0);
if (cdev->recov_in_prog)
return;
for (i = 0; i < QED_HW_STOP_RETRY_LIMIT; i++) {
if ((!qed_rd(p_hwfn, p_ptt,
TM_REG_PF_SCAN_ACTIVE_CONN)) &&
@ -2335,12 +2361,14 @@ int qed_hw_stop(struct qed_dev *cdev)
p_hwfn->hw_init_done = false;
/* Send unload command to MCP */
rc = qed_mcp_unload_req(p_hwfn, p_ptt);
if (rc) {
DP_NOTICE(p_hwfn,
"Failed sending a UNLOAD_REQ command. rc = %d.\n",
rc);
rc2 = -EINVAL;
if (!cdev->recov_in_prog) {
rc = qed_mcp_unload_req(p_hwfn, p_ptt);
if (rc) {
DP_NOTICE(p_hwfn,
"Failed sending a UNLOAD_REQ command. rc = %d.\n",
rc);
rc2 = -EINVAL;
}
}
qed_slowpath_irq_sync(p_hwfn);
@ -2382,27 +2410,31 @@ int qed_hw_stop(struct qed_dev *cdev)
qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DB_ENABLE, 0);
qed_wr(p_hwfn, p_ptt, QM_REG_PF_EN, 0);
qed_mcp_unload_done(p_hwfn, p_ptt);
if (rc) {
DP_NOTICE(p_hwfn,
"Failed sending a UNLOAD_DONE command. rc = %d.\n",
rc);
rc2 = -EINVAL;
if (!cdev->recov_in_prog) {
rc = qed_mcp_unload_done(p_hwfn, p_ptt);
if (rc) {
DP_NOTICE(p_hwfn,
"Failed sending a UNLOAD_DONE command. rc = %d.\n",
rc);
rc2 = -EINVAL;
}
}
}
if (IS_PF(cdev)) {
if (IS_PF(cdev) && !cdev->recov_in_prog) {
p_hwfn = QED_LEADING_HWFN(cdev);
p_ptt = QED_LEADING_HWFN(cdev)->p_main_ptt;
/* Disable DMAE in PXP - in CMT, this should only be done for
* first hw-function, and only after all transactions have
* stopped for all active hw-functions.
/* Clear the PF's internal FID_enable in the PXP.
* In CMT this should only be done for first hw-function, and
* only after all transactions have stopped for all active
* hw-functions.
*/
rc = qed_change_pci_hwfn(p_hwfn, p_ptt, false);
rc = qed_pglueb_set_pfid_enable(p_hwfn, p_ptt, false);
if (rc) {
DP_NOTICE(p_hwfn,
"qed_change_pci_hwfn failed. rc = %d.\n", rc);
"qed_pglueb_set_pfid_enable() failed. rc = %d.\n",
rc);
rc2 = -EINVAL;
}
}
@ -2502,9 +2534,8 @@ static void qed_hw_hwfn_prepare(struct qed_hwfn *p_hwfn)
PGLUE_B_REG_PGL_ADDR_94_F0_BB, 0);
}
/* Clean Previous errors if such exist */
qed_wr(p_hwfn, p_hwfn->p_main_ptt,
PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR, 1 << p_hwfn->abs_pf_id);
/* Clean previous pglue_b errors if such exist */
qed_pglueb_clear_err(p_hwfn, p_hwfn->p_main_ptt);
/* enable internal target-read */
qed_wr(p_hwfn, p_hwfn->p_main_ptt,
@ -3440,6 +3471,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
void __iomem *p_doorbells,
enum qed_pci_personality personality)
{
struct qed_dev *cdev = p_hwfn->cdev;
int rc = 0;
/* Split PCI bars evenly between hwfns */
@ -3492,7 +3524,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
/* Sending a mailbox to the MFW should be done after qed_get_hw_info()
* is called as it sets the ports number in an engine.
*/
if (IS_LEAD_HWFN(p_hwfn)) {
if (IS_LEAD_HWFN(p_hwfn) && !cdev->recov_in_prog) {
rc = qed_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt);
if (rc)
DP_NOTICE(p_hwfn, "Failed to initiate PF FLR\n");

View File

@ -472,6 +472,18 @@ int qed_get_queue_coalesce(struct qed_hwfn *p_hwfn, u16 *coal, void *handle);
int
qed_set_queue_coalesce(u16 rx_coal, u16 tx_coal, void *p_handle);
/**
* @brief qed_pglueb_set_pfid_enable - Enable or disable PCI BUS MASTER
*
* @param p_hwfn
* @param p_ptt
* @param b_enable - true/false
*
* @return int
*/
int qed_pglueb_set_pfid_enable(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, bool b_enable);
/**
* @brief db_recovery_add - add doorbell information to the doorbell
* recovery mechanism.

View File

@ -12827,7 +12827,7 @@ enum MFW_DRV_MSG_TYPE {
MFW_DRV_MSG_LLDP_DATA_UPDATED,
MFW_DRV_MSG_DCBX_REMOTE_MIB_UPDATED,
MFW_DRV_MSG_DCBX_OPERATIONAL_MIB_UPDATED,
MFW_DRV_MSG_RESERVED4,
MFW_DRV_MSG_ERROR_RECOVERY,
MFW_DRV_MSG_BW_UPDATE,
MFW_DRV_MSG_S_TAG_UPDATE,
MFW_DRV_MSG_GET_LAN_STATS,

View File

@ -703,6 +703,17 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
int qed_status = 0;
u32 offset = 0;
if (p_hwfn->cdev->recov_in_prog) {
DP_VERBOSE(p_hwfn,
NETIF_MSG_HW,
"Recovery is in progress. Avoid DMAE transaction [{src: addr 0x%llx, type %d}, {dst: addr 0x%llx, type %d}, size %d].\n",
src_addr, src_type, dst_addr, dst_type,
size_in_dwords);
/* Let the flow complete w/o any error handling */
return 0;
}
qed_dmae_opcode(p_hwfn,
(src_type == QED_DMAE_ADDRESS_GRC),
(dst_type == QED_DMAE_ADDRESS_GRC),

View File

@ -255,112 +255,114 @@ out:
#define PGLUE_ATTENTION_ICPL_VALID (1 << 23)
#define PGLUE_ATTENTION_ZLR_VALID (1 << 25)
#define PGLUE_ATTENTION_ILT_VALID (1 << 23)
static int qed_pglub_rbc_attn_cb(struct qed_hwfn *p_hwfn)
int qed_pglueb_rbc_attn_handler(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt)
{
u32 tmp;
tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
PGLUE_B_REG_TX_ERR_WR_DETAILS2);
tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_DETAILS2);
if (tmp & PGLUE_ATTENTION_VALID) {
u32 addr_lo, addr_hi, details;
addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
addr_lo = qed_rd(p_hwfn, p_ptt,
PGLUE_B_REG_TX_ERR_WR_ADD_31_0);
addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
addr_hi = qed_rd(p_hwfn, p_ptt,
PGLUE_B_REG_TX_ERR_WR_ADD_63_32);
details = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
details = qed_rd(p_hwfn, p_ptt,
PGLUE_B_REG_TX_ERR_WR_DETAILS);
DP_INFO(p_hwfn,
"Illegal write by chip to [%08x:%08x] blocked.\n"
"Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n"
"Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
addr_hi, addr_lo, details,
(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID),
(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID),
GET_FIELD(details,
PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0,
tmp,
GET_FIELD(tmp,
PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1 : 0,
GET_FIELD(tmp,
PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0,
GET_FIELD(tmp,
PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1 : 0);
DP_NOTICE(p_hwfn,
"Illegal write by chip to [%08x:%08x] blocked.\n"
"Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n"
"Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
addr_hi, addr_lo, details,
(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID),
(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID),
GET_FIELD(details,
PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0,
tmp,
GET_FIELD(tmp,
PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1 : 0,
GET_FIELD(tmp,
PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0,
GET_FIELD(tmp,
PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1 : 0);
}
tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
PGLUE_B_REG_TX_ERR_RD_DETAILS2);
tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_RD_DETAILS2);
if (tmp & PGLUE_ATTENTION_RD_VALID) {
u32 addr_lo, addr_hi, details;
addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
addr_lo = qed_rd(p_hwfn, p_ptt,
PGLUE_B_REG_TX_ERR_RD_ADD_31_0);
addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
addr_hi = qed_rd(p_hwfn, p_ptt,
PGLUE_B_REG_TX_ERR_RD_ADD_63_32);
details = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
details = qed_rd(p_hwfn, p_ptt,
PGLUE_B_REG_TX_ERR_RD_DETAILS);
DP_INFO(p_hwfn,
"Illegal read by chip from [%08x:%08x] blocked.\n"
" Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n"
" Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
addr_hi, addr_lo, details,
(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID),
(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID),
GET_FIELD(details,
PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0,
tmp,
GET_FIELD(tmp, PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1
: 0,
GET_FIELD(tmp, PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0,
GET_FIELD(tmp, PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1
: 0);
DP_NOTICE(p_hwfn,
"Illegal read by chip from [%08x:%08x] blocked.\n"
"Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n"
"Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n",
addr_hi, addr_lo, details,
(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID),
(u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID),
GET_FIELD(details,
PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0,
tmp,
GET_FIELD(tmp,
PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1 : 0,
GET_FIELD(tmp,
PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0,
GET_FIELD(tmp,
PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1 : 0);
}
tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
PGLUE_B_REG_TX_ERR_WR_DETAILS_ICPL);
tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_DETAILS_ICPL);
if (tmp & PGLUE_ATTENTION_ICPL_VALID)
DP_INFO(p_hwfn, "ICPL error - %08x\n", tmp);
DP_NOTICE(p_hwfn, "ICPL error - %08x\n", tmp);
tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
PGLUE_B_REG_MASTER_ZLR_ERR_DETAILS);
tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_MASTER_ZLR_ERR_DETAILS);
if (tmp & PGLUE_ATTENTION_ZLR_VALID) {
u32 addr_hi, addr_lo;
addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
addr_lo = qed_rd(p_hwfn, p_ptt,
PGLUE_B_REG_MASTER_ZLR_ERR_ADD_31_0);
addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
addr_hi = qed_rd(p_hwfn, p_ptt,
PGLUE_B_REG_MASTER_ZLR_ERR_ADD_63_32);
DP_INFO(p_hwfn, "ZLR eror - %08x [Address %08x:%08x]\n",
tmp, addr_hi, addr_lo);
DP_NOTICE(p_hwfn, "ZLR error - %08x [Address %08x:%08x]\n",
tmp, addr_hi, addr_lo);
}
tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
PGLUE_B_REG_VF_ILT_ERR_DETAILS2);
tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_VF_ILT_ERR_DETAILS2);
if (tmp & PGLUE_ATTENTION_ILT_VALID) {
u32 addr_hi, addr_lo, details;
addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
addr_lo = qed_rd(p_hwfn, p_ptt,
PGLUE_B_REG_VF_ILT_ERR_ADD_31_0);
addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
addr_hi = qed_rd(p_hwfn, p_ptt,
PGLUE_B_REG_VF_ILT_ERR_ADD_63_32);
details = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
details = qed_rd(p_hwfn, p_ptt,
PGLUE_B_REG_VF_ILT_ERR_DETAILS);
DP_INFO(p_hwfn,
"ILT error - Details %08x Details2 %08x [Address %08x:%08x]\n",
details, tmp, addr_hi, addr_lo);
DP_NOTICE(p_hwfn,
"ILT error - Details %08x Details2 %08x [Address %08x:%08x]\n",
details, tmp, addr_hi, addr_lo);
}
/* Clear the indications */
qed_wr(p_hwfn, p_hwfn->p_dpc_ptt,
PGLUE_B_REG_LATCHED_ERRORS_CLR, (1 << 2));
qed_wr(p_hwfn, p_ptt, PGLUE_B_REG_LATCHED_ERRORS_CLR, BIT(2));
return 0;
}
static int qed_pglueb_rbc_attn_cb(struct qed_hwfn *p_hwfn)
{
return qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_dpc_ptt);
}
#define QED_DORQ_ATTENTION_REASON_MASK (0xfffff)
#define QED_DORQ_ATTENTION_OPAQUE_MASK (0xffff)
#define QED_DORQ_ATTENTION_OPAQUE_SHIFT (0x0)
@ -540,7 +542,7 @@ static struct aeu_invert_reg aeu_descs[NUM_ATTN_REGS] = {
{"PGLUE misc_flr", ATTENTION_SINGLE,
NULL, MAX_BLOCK_ID},
{"PGLUE B RBC", ATTENTION_PAR_INT,
qed_pglub_rbc_attn_cb, BLOCK_PGLUE_B},
qed_pglueb_rbc_attn_cb, BLOCK_PGLUE_B},
{"PGLUE misc_mctp", ATTENTION_SINGLE,
NULL, MAX_BLOCK_ID},
{"Flash event", ATTENTION_SINGLE, NULL, MAX_BLOCK_ID},

View File

@ -431,4 +431,7 @@ int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
#define QED_MAPPING_MEMORY_SIZE(dev) (NUM_OF_SBS(dev))
int qed_pglueb_rbc_attn_handler(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt);
#endif

View File

@ -359,6 +359,8 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev,
qed_init_dp(cdev, params->dp_module, params->dp_level);
cdev->recov_in_prog = params->recov_in_prog;
rc = qed_init_pci(cdev, pdev);
if (rc) {
DP_ERR(cdev, "init pci failed\n");
@ -2203,6 +2205,15 @@ static int qed_nvm_get_image(struct qed_dev *cdev, enum qed_nvm_images type,
return qed_mcp_get_nvm_image(hwfn, type, buf, len);
}
void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn)
{
struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common;
void *cookie = p_hwfn->cdev->ops_cookie;
if (ops && ops->schedule_recovery_handler)
ops->schedule_recovery_handler(cookie);
}
static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
void *handle)
{
@ -2226,6 +2237,23 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode)
return status;
}
static int qed_recovery_process(struct qed_dev *cdev)
{
struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
struct qed_ptt *p_ptt;
int rc = 0;
p_ptt = qed_ptt_acquire(p_hwfn);
if (!p_ptt)
return -EAGAIN;
rc = qed_start_recovery_process(p_hwfn, p_ptt);
qed_ptt_release(p_hwfn, p_ptt);
return rc;
}
static int qed_update_wol(struct qed_dev *cdev, bool enabled)
{
struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
@ -2380,6 +2408,8 @@ const struct qed_common_ops qed_common_ops_pass = {
.nvm_get_image = &qed_nvm_get_image,
.set_coalesce = &qed_set_coalesce,
.set_led = &qed_set_led,
.recovery_process = &qed_recovery_process,
.recovery_prolog = &qed_recovery_prolog,
.update_drv_state = &qed_update_drv_state,
.update_mac = &qed_update_mac,
.update_mtu = &qed_update_mtu,

View File

@ -1070,6 +1070,27 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
return 0;
}
int qed_mcp_load_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
u32 resp = 0, param = 0;
int rc;
rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_LOAD_DONE, 0, &resp,
&param);
if (rc) {
DP_NOTICE(p_hwfn,
"Failed to send a LOAD_DONE command, rc = %d\n", rc);
return rc;
}
/* Check if there is a DID mismatch between nvm-cfg/efuse */
if (param & FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR)
DP_NOTICE(p_hwfn,
"warning: device configuration is not supported on this board type. The device may not function as expected.\n");
return 0;
}
int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct qed_mcp_mb_params mb_params;
@ -1528,6 +1549,60 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up)
return 0;
}
u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt)
{
u32 path_offsize_addr, path_offsize, path_addr, proc_kill_cnt;
if (IS_VF(p_hwfn->cdev))
return -EINVAL;
path_offsize_addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
PUBLIC_PATH);
path_offsize = qed_rd(p_hwfn, p_ptt, path_offsize_addr);
path_addr = SECTION_ADDR(path_offsize, QED_PATH_ID(p_hwfn));
proc_kill_cnt = qed_rd(p_hwfn, p_ptt,
path_addr +
offsetof(struct public_path, process_kill)) &
PROCESS_KILL_COUNTER_MASK;
return proc_kill_cnt;
}
static void qed_mcp_handle_process_kill(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt)
{
struct qed_dev *cdev = p_hwfn->cdev;
u32 proc_kill_cnt;
/* Prevent possible attentions/interrupts during the recovery handling
* and till its load phase, during which they will be re-enabled.
*/
qed_int_igu_disable_int(p_hwfn, p_ptt);
DP_NOTICE(p_hwfn, "Received a process kill indication\n");
/* The following operations should be done once, and thus in CMT mode
* are carried out by only the first HW function.
*/
if (p_hwfn != QED_LEADING_HWFN(cdev))
return;
if (cdev->recov_in_prog) {
DP_NOTICE(p_hwfn,
"Ignoring the indication since a recovery process is already in progress\n");
return;
}
cdev->recov_in_prog = true;
proc_kill_cnt = qed_get_process_kill_counter(p_hwfn, p_ptt);
DP_NOTICE(p_hwfn, "Process kill counter: %d\n", proc_kill_cnt);
qed_schedule_recovery_handler(p_hwfn);
}
static void qed_mcp_send_protocol_stats(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
enum MFW_DRV_MSG_TYPE type)
@ -1758,6 +1833,9 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE:
qed_mcp_handle_transceiver_change(p_hwfn, p_ptt);
break;
case MFW_DRV_MSG_ERROR_RECOVERY:
qed_mcp_handle_process_kill(p_hwfn, p_ptt);
break;
case MFW_DRV_MSG_GET_LAN_STATS:
case MFW_DRV_MSG_GET_FCOE_STATS:
case MFW_DRV_MSG_GET_ISCSI_STATS:
@ -2303,6 +2381,43 @@ int qed_mcp_get_flash_size(struct qed_hwfn *p_hwfn,
return 0;
}
int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct qed_dev *cdev = p_hwfn->cdev;
if (cdev->recov_in_prog) {
DP_NOTICE(p_hwfn,
"Avoid triggering a recovery since such a process is already in progress\n");
return -EAGAIN;
}
DP_NOTICE(p_hwfn, "Triggering a recovery process\n");
qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_GENERAL_ATTN_35, 0x1);
return 0;
}
#define QED_RECOVERY_PROLOG_SLEEP_MS 100
int qed_recovery_prolog(struct qed_dev *cdev)
{
struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
struct qed_ptt *p_ptt = p_hwfn->p_main_ptt;
int rc;
/* Allow ongoing PCIe transactions to complete */
msleep(QED_RECOVERY_PROLOG_SLEEP_MS);
/* Clear the PF's internal FID_enable in the PXP */
rc = qed_pglueb_set_pfid_enable(p_hwfn, p_ptt, false);
if (rc)
DP_NOTICE(p_hwfn,
"qed_pglueb_set_pfid_enable() failed. rc = %d.\n",
rc);
return rc;
}
static int
qed_mcp_config_vf_msix_bb(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, u8 vf_id, u8 num)

View File

@ -440,6 +440,38 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
struct qed_mcp_drv_version *p_ver);
/**
* @brief Read the MFW process kill counter
*
* @param p_hwfn
* @param p_ptt
*
* @return u32
*/
u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt);
/**
* @brief Trigger a recovery process
*
* @param p_hwfn
* @param p_ptt
*
* @return int
*/
int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
/**
* @brief A recovery handler must call this function as its first step.
* It is assumed that the handler is not run from an interrupt context.
*
* @param cdev
* @param p_ptt
*
* @return int
*/
int qed_recovery_prolog(struct qed_dev *cdev);
/**
* @brief Notify MFW about the change in base device properties
*
@ -800,6 +832,16 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
struct qed_load_req_params *p_params);
/**
* @brief Sends a LOAD_DONE message to the MFW
*
* @param p_hwfn
* @param p_ptt
*
* @return int - 0 - Operation was successful.
*/
int qed_mcp_load_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
/**
* @brief Sends a UNLOAD_REQ message to the MFW
*

View File

@ -518,6 +518,8 @@
0x180824UL
#define MISC_REG_AEU_GENERAL_ATTN_0 \
0x008400UL
#define MISC_REG_AEU_GENERAL_ATTN_35 \
0x00848cUL
#define CAU_REG_SB_ADDR_MEMORY \
0x1c8000UL
#define CAU_REG_SB_VAR_MEMORY \

View File

@ -790,6 +790,17 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn)
SPQ_HIGH_PRI_RESERVE_DEFAULT);
}
static void qed_spq_recov_set_ret_code(struct qed_spq_entry *p_ent,
u8 *fw_return_code)
{
if (!fw_return_code)
return;
if (p_ent->elem.hdr.protocol_id == PROTOCOLID_ROCE ||
p_ent->elem.hdr.protocol_id == PROTOCOLID_IWARP)
*fw_return_code = RDMA_RETURN_OK;
}
/* Avoid overriding of SPQ entries when getting out-of-order completions, by
* marking the completions in a bitmap and increasing the chain consumer only
* for the first successive completed entries.
@ -825,6 +836,17 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
return -EINVAL;
}
if (p_hwfn->cdev->recov_in_prog) {
DP_VERBOSE(p_hwfn,
QED_MSG_SPQ,
"Recovery is in progress. Skip spq post [cmd %02x protocol %02x]\n",
p_ent->elem.hdr.cmd_id, p_ent->elem.hdr.protocol_id);
/* Let the flow complete w/o any error handling */
qed_spq_recov_set_ret_code(p_ent, fw_return_code);
return 0;
}
/* Complete the entry */
rc = qed_spq_fill_entry(p_hwfn, p_ent);

View File

@ -4447,6 +4447,13 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
if (cdev->p_iov_info && cdev->p_iov_info->num_vfs && pci_enabled)
pci_disable_sriov(cdev->pdev);
if (cdev->recov_in_prog) {
DP_VERBOSE(cdev,
QED_MSG_IOV,
"Skip SRIOV disable operations in the device since a recovery is in progress\n");
goto out;
}
for_each_hwfn(cdev, i) {
struct qed_hwfn *hwfn = &cdev->hwfns[i];
struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
@ -4486,7 +4493,7 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
qed_ptt_release(hwfn, ptt);
}
out:
qed_iov_set_vfs_to_disable(cdev, false);
return 0;

View File

@ -162,6 +162,7 @@ struct qede_rdma_dev {
struct list_head entry;
struct list_head rdma_event_list;
struct workqueue_struct *rdma_wq;
bool exp_recovery;
};
struct qede_ptp;
@ -264,6 +265,7 @@ struct qede_dev {
enum QEDE_STATE {
QEDE_STATE_CLOSED,
QEDE_STATE_OPEN,
QEDE_STATE_RECOVERY,
};
#define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo))
@ -462,6 +464,7 @@ struct qede_fastpath {
#define QEDE_CSUM_UNNECESSARY BIT(1)
#define QEDE_TUNN_CSUM_UNNECESSARY BIT(2)
#define QEDE_SP_RECOVERY 0
#define QEDE_SP_RX_MODE 1
#ifdef CONFIG_RFS_ACCEL

View File

@ -133,23 +133,12 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id);
static void qede_remove(struct pci_dev *pdev);
static void qede_shutdown(struct pci_dev *pdev);
static void qede_link_update(void *dev, struct qed_link_output *link);
static void qede_schedule_recovery_handler(void *dev);
static void qede_recovery_handler(struct qede_dev *edev);
static void qede_get_eth_tlv_data(void *edev, void *data);
static void qede_get_generic_tlv_data(void *edev,
struct qed_generic_tlvs *data);
/* The qede lock is used to protect driver state change and driver flows that
* are not reentrant.
*/
void __qede_lock(struct qede_dev *edev)
{
mutex_lock(&edev->qede_lock);
}
void __qede_unlock(struct qede_dev *edev)
{
mutex_unlock(&edev->qede_lock);
}
#ifdef CONFIG_QED_SRIOV
static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
__be16 vlan_proto)
@ -231,6 +220,7 @@ static struct qed_eth_cb_ops qede_ll_ops = {
.arfs_filter_op = qede_arfs_filter_op,
#endif
.link_update = qede_link_update,
.schedule_recovery_handler = qede_schedule_recovery_handler,
.get_generic_tlv_data = qede_get_generic_tlv_data,
.get_protocol_tlv_data = qede_get_eth_tlv_data,
},
@ -950,11 +940,57 @@ err:
return -ENOMEM;
}
/* The qede lock is used to protect driver state change and driver flows that
* are not reentrant.
*/
void __qede_lock(struct qede_dev *edev)
{
mutex_lock(&edev->qede_lock);
}
void __qede_unlock(struct qede_dev *edev)
{
mutex_unlock(&edev->qede_lock);
}
/* This version of the lock should be used when acquiring the RTNL lock is also
* needed in addition to the internal qede lock.
*/
void qede_lock(struct qede_dev *edev)
{
rtnl_lock();
__qede_lock(edev);
}
void qede_unlock(struct qede_dev *edev)
{
__qede_unlock(edev);
rtnl_unlock();
}
static void qede_sp_task(struct work_struct *work)
{
struct qede_dev *edev = container_of(work, struct qede_dev,
sp_task.work);
/* The locking scheme depends on the specific flag:
* In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to
* ensure that ongoing flows are ended and new ones are not started.
* In other cases - only the internal qede lock should be acquired.
*/
if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) {
#ifdef CONFIG_QED_SRIOV
/* SRIOV must be disabled outside the lock to avoid a deadlock.
* The recovery of the active VFs is currently not supported.
*/
qede_sriov_configure(edev->pdev, 0);
#endif
qede_lock(edev);
qede_recovery_handler(edev);
qede_unlock(edev);
}
__qede_lock(edev);
if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags))
@ -1031,8 +1067,13 @@ static void qede_log_probe(struct qede_dev *edev)
enum qede_probe_mode {
QEDE_PROBE_NORMAL,
QEDE_PROBE_RECOVERY,
};
#define QEDE_RDMA_PROBE_MODE(mode) \
((mode) == QEDE_PROBE_NORMAL ? QEDE_RDMA_PROBE_NORMAL \
: QEDE_RDMA_PROBE_RECOVERY)
static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
bool is_vf, enum qede_probe_mode mode)
{
@ -1051,6 +1092,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
probe_params.dp_module = dp_module;
probe_params.dp_level = dp_level;
probe_params.is_vf = is_vf;
probe_params.recov_in_prog = (mode == QEDE_PROBE_RECOVERY);
cdev = qed_ops->common->probe(pdev, &probe_params);
if (!cdev) {
rc = -ENODEV;
@ -1078,11 +1120,20 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
if (rc)
goto err2;
edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
dp_level);
if (!edev) {
rc = -ENOMEM;
goto err2;
if (mode != QEDE_PROBE_RECOVERY) {
edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
dp_level);
if (!edev) {
rc = -ENOMEM;
goto err2;
}
} else {
struct net_device *ndev = pci_get_drvdata(pdev);
edev = netdev_priv(ndev);
edev->cdev = cdev;
memset(&edev->stats, 0, sizeof(edev->stats));
memcpy(&edev->dev_info, &dev_info, sizeof(dev_info));
}
if (is_vf)
@ -1090,28 +1141,31 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
qede_init_ndev(edev);
rc = qede_rdma_dev_add(edev);
rc = qede_rdma_dev_add(edev, QEDE_RDMA_PROBE_MODE(mode));
if (rc)
goto err3;
/* Prepare the lock prior to the registration of the netdev,
* as once it's registered we might reach flows requiring it
* [it's even possible to reach a flow needing it directly
* from there, although it's unlikely].
*/
INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
mutex_init(&edev->qede_lock);
rc = register_netdev(edev->ndev);
if (rc) {
DP_NOTICE(edev, "Cannot register net-device\n");
goto err4;
if (mode != QEDE_PROBE_RECOVERY) {
/* Prepare the lock prior to the registration of the netdev,
* as once it's registered we might reach flows requiring it
* [it's even possible to reach a flow needing it directly
* from there, although it's unlikely].
*/
INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
mutex_init(&edev->qede_lock);
rc = register_netdev(edev->ndev);
if (rc) {
DP_NOTICE(edev, "Cannot register net-device\n");
goto err4;
}
}
edev->ops->common->set_name(cdev, edev->ndev->name);
/* PTP not supported on VFs */
if (!is_vf)
qede_ptp_enable(edev, true);
qede_ptp_enable(edev, (mode == QEDE_PROBE_NORMAL));
edev->ops->register_ops(cdev, &qede_ll_ops, edev);
@ -1126,7 +1180,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
return 0;
err4:
qede_rdma_dev_remove(edev);
qede_rdma_dev_remove(edev, QEDE_RDMA_PROBE_MODE(mode));
err3:
free_netdev(edev->ndev);
err2:
@ -1162,8 +1216,13 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id)
enum qede_remove_mode {
QEDE_REMOVE_NORMAL,
QEDE_REMOVE_RECOVERY,
};
#define QEDE_RDMA_REMOVE_MODE(mode) \
((mode) == QEDE_REMOVE_NORMAL ? QEDE_RDMA_REMOVE_NORMAL \
: QEDE_RDMA_REMOVE_RECOVERY)
static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
{
struct net_device *ndev = pci_get_drvdata(pdev);
@ -1172,16 +1231,20 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
DP_INFO(edev, "Starting qede_remove\n");
qede_rdma_dev_remove(edev);
unregister_netdev(ndev);
cancel_delayed_work_sync(&edev->sp_task);
qede_rdma_dev_remove(edev, QEDE_RDMA_REMOVE_MODE(mode));
if (mode != QEDE_REMOVE_RECOVERY) {
unregister_netdev(ndev);
cancel_delayed_work_sync(&edev->sp_task);
edev->ops->common->set_power_state(cdev, PCI_D0);
pci_set_drvdata(pdev, NULL);
}
qede_ptp_disable(edev);
edev->ops->common->set_power_state(cdev, PCI_D0);
pci_set_drvdata(pdev, NULL);
/* Use global ops since we've freed edev */
qed_ops->common->slowpath_stop(cdev);
if (system_state == SYSTEM_POWER_OFF)
@ -1194,7 +1257,8 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
* [e.g., QED register callbacks] won't break anything when
* accessing the netdevice.
*/
free_netdev(ndev);
if (mode != QEDE_REMOVE_RECOVERY)
free_netdev(ndev);
dev_info(&pdev->dev, "Ending qede_remove successfully\n");
}
@ -1539,6 +1603,58 @@ static int qede_alloc_mem_load(struct qede_dev *edev)
return 0;
}
static void qede_empty_tx_queue(struct qede_dev *edev,
struct qede_tx_queue *txq)
{
unsigned int pkts_compl = 0, bytes_compl = 0;
struct netdev_queue *netdev_txq;
int rc, len = 0;
netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id);
while (qed_chain_get_cons_idx(&txq->tx_pbl) !=
qed_chain_get_prod_idx(&txq->tx_pbl)) {
DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
"Freeing a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
txq->index, qed_chain_get_cons_idx(&txq->tx_pbl),
qed_chain_get_prod_idx(&txq->tx_pbl));
rc = qede_free_tx_pkt(edev, txq, &len);
if (rc) {
DP_NOTICE(edev,
"Failed to free a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
txq->index,
qed_chain_get_cons_idx(&txq->tx_pbl),
qed_chain_get_prod_idx(&txq->tx_pbl));
break;
}
bytes_compl += len;
pkts_compl++;
txq->sw_tx_cons++;
}
netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl);
}
static void qede_empty_tx_queues(struct qede_dev *edev)
{
int i;
for_each_queue(i)
if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
int cos;
for_each_cos_in_txq(edev, cos) {
struct qede_fastpath *fp;
fp = &edev->fp_array[i];
qede_empty_tx_queue(edev,
&fp->txq[cos]);
}
}
}
/* This function inits fp content and resets the SB, RXQ and TXQ structures */
static void qede_init_fp(struct qede_dev *edev)
{
@ -2053,6 +2169,7 @@ out:
enum qede_unload_mode {
QEDE_UNLOAD_NORMAL,
QEDE_UNLOAD_RECOVERY,
};
static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
@ -2068,7 +2185,8 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
clear_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags);
edev->state = QEDE_STATE_CLOSED;
if (mode != QEDE_UNLOAD_RECOVERY)
edev->state = QEDE_STATE_CLOSED;
qede_rdma_dev_event_close(edev);
@ -2076,17 +2194,20 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
netif_tx_disable(edev->ndev);
netif_carrier_off(edev->ndev);
/* Reset the link */
memset(&link_params, 0, sizeof(link_params));
link_params.link_up = false;
edev->ops->common->set_link(edev->cdev, &link_params);
rc = qede_stop_queues(edev);
if (rc) {
qede_sync_free_irqs(edev);
goto out;
}
if (mode != QEDE_UNLOAD_RECOVERY) {
/* Reset the link */
memset(&link_params, 0, sizeof(link_params));
link_params.link_up = false;
edev->ops->common->set_link(edev->cdev, &link_params);
DP_INFO(edev, "Stopped Queues\n");
rc = qede_stop_queues(edev);
if (rc) {
qede_sync_free_irqs(edev);
goto out;
}
DP_INFO(edev, "Stopped Queues\n");
}
qede_vlan_mark_nonconfigured(edev);
edev->ops->fastpath_stop(edev->cdev);
@ -2102,18 +2223,26 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
qede_napi_disable_remove(edev);
if (mode == QEDE_UNLOAD_RECOVERY)
qede_empty_tx_queues(edev);
qede_free_mem_load(edev);
qede_free_fp_array(edev);
out:
if (!is_locked)
__qede_unlock(edev);
if (mode != QEDE_UNLOAD_RECOVERY)
DP_NOTICE(edev, "Link is down\n");
DP_INFO(edev, "Ending qede unload\n");
}
enum qede_load_mode {
QEDE_LOAD_NORMAL,
QEDE_LOAD_RELOAD,
QEDE_LOAD_RECOVERY,
};
static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
@ -2293,6 +2422,77 @@ static void qede_link_update(void *dev, struct qed_link_output *link)
}
}
static void qede_schedule_recovery_handler(void *dev)
{
struct qede_dev *edev = dev;
if (edev->state == QEDE_STATE_RECOVERY) {
DP_NOTICE(edev,
"Avoid scheduling a recovery handling since already in recovery state\n");
return;
}
set_bit(QEDE_SP_RECOVERY, &edev->sp_flags);
schedule_delayed_work(&edev->sp_task, 0);
DP_INFO(edev, "Scheduled a recovery handler\n");
}
static void qede_recovery_failed(struct qede_dev *edev)
{
netdev_err(edev->ndev, "Recovery handling has failed. Power cycle is needed.\n");
netif_device_detach(edev->ndev);
if (edev->cdev)
edev->ops->common->set_power_state(edev->cdev, PCI_D3hot);
}
static void qede_recovery_handler(struct qede_dev *edev)
{
u32 curr_state = edev->state;
int rc;
DP_NOTICE(edev, "Starting a recovery process\n");
/* No need to acquire first the qede_lock since is done by qede_sp_task
* before calling this function.
*/
edev->state = QEDE_STATE_RECOVERY;
edev->ops->common->recovery_prolog(edev->cdev);
if (curr_state == QEDE_STATE_OPEN)
qede_unload(edev, QEDE_UNLOAD_RECOVERY, true);
__qede_remove(edev->pdev, QEDE_REMOVE_RECOVERY);
rc = __qede_probe(edev->pdev, edev->dp_module, edev->dp_level,
IS_VF(edev), QEDE_PROBE_RECOVERY);
if (rc) {
edev->cdev = NULL;
goto err;
}
if (curr_state == QEDE_STATE_OPEN) {
rc = qede_load(edev, QEDE_LOAD_RECOVERY, true);
if (rc)
goto err;
qede_config_rx_mode(edev->ndev);
udp_tunnel_get_rx_info(edev->ndev);
}
edev->state = curr_state;
DP_NOTICE(edev, "Recovery handling is done\n");
return;
err:
qede_recovery_failed(edev);
}
static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
{
struct netdev_queue *netdev_txq;

View File

@ -50,6 +50,8 @@ static void _qede_rdma_dev_add(struct qede_dev *edev)
if (!qedr_drv)
return;
/* Leftovers from previous error recovery */
edev->rdma_info.exp_recovery = false;
edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev,
edev->ndev);
}
@ -87,21 +89,26 @@ static void qede_rdma_destroy_wq(struct qede_dev *edev)
destroy_workqueue(edev->rdma_info.rdma_wq);
}
int qede_rdma_dev_add(struct qede_dev *edev)
int qede_rdma_dev_add(struct qede_dev *edev, enum qede_rdma_probe_mode mode)
{
int rc = 0;
int rc;
if (qede_rdma_supported(edev)) {
rc = qede_rdma_create_wq(edev);
if (rc)
return rc;
if (!qede_rdma_supported(edev))
return 0;
INIT_LIST_HEAD(&edev->rdma_info.entry);
mutex_lock(&qedr_dev_list_lock);
list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
_qede_rdma_dev_add(edev);
mutex_unlock(&qedr_dev_list_lock);
}
/* Cannot start qedr while recovering since it wasn't fully stopped */
if (mode == QEDE_RDMA_PROBE_RECOVERY)
return 0;
rc = qede_rdma_create_wq(edev);
if (rc)
return rc;
INIT_LIST_HEAD(&edev->rdma_info.entry);
mutex_lock(&qedr_dev_list_lock);
list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
_qede_rdma_dev_add(edev);
mutex_unlock(&qedr_dev_list_lock);
return rc;
}
@ -110,19 +117,31 @@ static void _qede_rdma_dev_remove(struct qede_dev *edev)
{
if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev)
qedr_drv->remove(edev->rdma_info.qedr_dev);
edev->rdma_info.qedr_dev = NULL;
}
void qede_rdma_dev_remove(struct qede_dev *edev)
void qede_rdma_dev_remove(struct qede_dev *edev,
enum qede_rdma_remove_mode mode)
{
if (!qede_rdma_supported(edev))
return;
qede_rdma_destroy_wq(edev);
mutex_lock(&qedr_dev_list_lock);
_qede_rdma_dev_remove(edev);
list_del(&edev->rdma_info.entry);
mutex_unlock(&qedr_dev_list_lock);
/* Cannot remove qedr while recovering since it wasn't fully stopped */
if (mode == QEDE_RDMA_REMOVE_NORMAL) {
qede_rdma_destroy_wq(edev);
mutex_lock(&qedr_dev_list_lock);
if (!edev->rdma_info.exp_recovery)
_qede_rdma_dev_remove(edev);
edev->rdma_info.qedr_dev = NULL;
list_del(&edev->rdma_info.entry);
mutex_unlock(&qedr_dev_list_lock);
} else {
if (!edev->rdma_info.exp_recovery) {
mutex_lock(&qedr_dev_list_lock);
_qede_rdma_dev_remove(edev);
mutex_unlock(&qedr_dev_list_lock);
}
edev->rdma_info.exp_recovery = true;
}
}
static void _qede_rdma_dev_open(struct qede_dev *edev)
@ -204,7 +223,8 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv)
mutex_lock(&qedr_dev_list_lock);
list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
if (edev->rdma_info.qedr_dev)
/* If device has experienced recovery it was already removed */
if (edev->rdma_info.qedr_dev && !edev->rdma_info.exp_recovery)
_qede_rdma_dev_remove(edev);
}
qedr_drv = NULL;
@ -284,6 +304,10 @@ static void qede_rdma_add_event(struct qede_dev *edev,
{
struct qede_rdma_event_work *event_node;
/* If a recovery was experienced avoid adding the event */
if (edev->rdma_info.exp_recovery)
return;
if (!edev->rdma_info.qedr_dev)
return;

View File

@ -764,6 +764,7 @@ struct qed_probe_params {
u32 dp_module;
u8 dp_level;
bool is_vf;
bool recov_in_prog;
};
#define QED_DRV_VER_STR_SIZE 12
@ -810,6 +811,7 @@ struct qed_common_cb_ops {
void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc);
void (*link_update)(void *dev,
struct qed_link_output *link);
void (*schedule_recovery_handler)(void *dev);
void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data);
void (*get_protocol_tlv_data)(void *dev, void *data);
@ -1057,6 +1059,24 @@ struct qed_common_ops {
int (*db_recovery_del)(struct qed_dev *cdev,
void __iomem *db_addr, void *db_data);
/**
* @brief recovery_process - Trigger a recovery process
*
* @param cdev
*
* @return 0 on success, error otherwise.
*/
int (*recovery_process)(struct qed_dev *cdev);
/**
* @brief recovery_prolog - Execute the prolog operations of a recovery process
*
* @param cdev
*
* @return 0 on success, error otherwise.
*/
int (*recovery_prolog)(struct qed_dev *cdev);
/**
* @brief update_drv_state - API to inform the change in the driver state.
*

View File

@ -55,6 +55,16 @@ struct qede_rdma_event_work {
enum qede_rdma_event event;
};
enum qede_rdma_probe_mode {
QEDE_RDMA_PROBE_NORMAL,
QEDE_RDMA_PROBE_RECOVERY,
};
enum qede_rdma_remove_mode {
QEDE_RDMA_REMOVE_NORMAL,
QEDE_RDMA_REMOVE_RECOVERY,
};
struct qedr_driver {
unsigned char name[32];
@ -74,21 +84,24 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv);
bool qede_rdma_supported(struct qede_dev *dev);
#if IS_ENABLED(CONFIG_QED_RDMA)
int qede_rdma_dev_add(struct qede_dev *dev);
int qede_rdma_dev_add(struct qede_dev *dev, enum qede_rdma_probe_mode mode);
void qede_rdma_dev_event_open(struct qede_dev *dev);
void qede_rdma_dev_event_close(struct qede_dev *dev);
void qede_rdma_dev_remove(struct qede_dev *dev);
void qede_rdma_dev_remove(struct qede_dev *dev,
enum qede_rdma_remove_mode mode);
void qede_rdma_event_changeaddr(struct qede_dev *edr);
#else
static inline int qede_rdma_dev_add(struct qede_dev *dev)
static inline int qede_rdma_dev_add(struct qede_dev *dev,
enum qede_rdma_probe_mode mode)
{
return 0;
}
static inline void qede_rdma_dev_event_open(struct qede_dev *dev) {}
static inline void qede_rdma_dev_event_close(struct qede_dev *dev) {}
static inline void qede_rdma_dev_remove(struct qede_dev *dev) {}
static inline void qede_rdma_dev_remove(struct qede_dev *dev,
enum qede_rdma_remove_mode mode) {}
static inline void qede_rdma_event_changeaddr(struct qede_dev *edr) {}
#endif
#endif