IPoIB: Handle P_Key table reordering

SM reconfiguration or failover possibly causes a shuffling of the values
in the P_Key table. Right now, IPoIB only queries for the P_Key index
once when it creates the device QP, and hence there are problems if the
index of a P_Key value changes.  Fix this by using the PKEY_CHANGE event
to trigger a recheck of the P_Key index.

Signed-off-by: Yosef Etigin <yosefe@voltaire.com>
Acked-by: Michael S. Tsirkin <mst@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
Yosef Etigin 2007-05-19 08:51:54 -07:00 committed by Roland Dreier
parent 1af4c435f3
commit 26bbf13ce1
4 changed files with 92 additions and 45 deletions

View file

@ -201,15 +201,17 @@ struct ipoib_dev_priv {
struct list_head multicast_list;
struct rb_root multicast_tree;
struct delayed_work pkey_task;
struct delayed_work pkey_poll_task;
struct delayed_work mcast_task;
struct work_struct flush_task;
struct work_struct restart_task;
struct delayed_work ah_reap_task;
struct work_struct pkey_event_task;
struct ib_device *ca;
u8 port;
u16 pkey;
u16 pkey_index;
struct ib_pd *pd;
struct ib_mr *mr;
struct ib_cq *cq;
@ -333,12 +335,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_ib_dev_flush(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev);
int ipoib_ib_dev_up(struct net_device *dev);
int ipoib_ib_dev_down(struct net_device *dev, int flush);
int ipoib_ib_dev_stop(struct net_device *dev);
int ipoib_ib_dev_stop(struct net_device *dev, int flush);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_dev_cleanup(struct net_device *dev);

View file

@ -448,6 +448,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey);
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
return -1;
}
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ret = ipoib_init_qp(dev);
if (ret) {
ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
@ -457,14 +464,14 @@ int ipoib_ib_dev_open(struct net_device *dev)
ret = ipoib_ib_post_receives(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
ipoib_ib_dev_stop(dev);
ipoib_ib_dev_stop(dev, 1);
return -1;
}
ret = ipoib_cm_dev_open(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
ipoib_ib_dev_stop(dev);
ipoib_ib_dev_stop(dev, 1);
return -1;
}
@ -516,7 +523,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
mutex_lock(&pkey_mutex);
set_bit(IPOIB_PKEY_STOP, &priv->flags);
cancel_delayed_work(&priv->pkey_task);
cancel_delayed_work(&priv->pkey_poll_task);
mutex_unlock(&pkey_mutex);
if (flush)
flush_workqueue(ipoib_workqueue);
@ -543,7 +550,7 @@ static int recvs_pending(struct net_device *dev)
return pending;
}
int ipoib_ib_dev_stop(struct net_device *dev)
int ipoib_ib_dev_stop(struct net_device *dev, int flush)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr;
@ -629,7 +636,8 @@ timeout:
/* Wait for all AHs to be reaped */
set_bit(IPOIB_STOP_REAPER, &priv->flags);
cancel_delayed_work(&priv->ah_reap_task);
flush_workqueue(ipoib_workqueue);
if (flush)
flush_workqueue(ipoib_workqueue);
begin = jiffies;
@ -673,13 +681,24 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
return 0;
}
void ipoib_ib_dev_flush(struct work_struct *work)
static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
{
struct ipoib_dev_priv *cpriv, *priv =
container_of(work, struct ipoib_dev_priv, flush_task);
struct ipoib_dev_priv *cpriv;
struct net_device *dev = priv->dev;
u16 new_index;
if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) {
mutex_lock(&priv->vlan_mutex);
/*
* Flush any child interfaces too -- they might be up even if
* the parent is down.
*/
list_for_each_entry(cpriv, &priv->child_intfs, list)
__ipoib_ib_dev_flush(cpriv, pkey_event);
mutex_unlock(&priv->vlan_mutex);
if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
return;
}
@ -689,10 +708,32 @@ void ipoib_ib_dev_flush(struct work_struct *work)
return;
}
if (pkey_event) {
if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ipoib_ib_dev_down(dev, 0);
ipoib_pkey_dev_delay_open(dev);
return;
}
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
/* restart QP only if P_Key index is changed */
if (new_index == priv->pkey_index) {
ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
return;
}
priv->pkey_index = new_index;
}
ipoib_dbg(priv, "flushing\n");
ipoib_ib_dev_down(dev, 0);
if (pkey_event) {
ipoib_ib_dev_stop(dev, 0);
ipoib_ib_dev_open(dev);
}
/*
* The device could have been brought down between the start and when
* we get here, don't bring it back up if it's not configured up
@ -701,14 +742,24 @@ void ipoib_ib_dev_flush(struct work_struct *work)
ipoib_ib_dev_up(dev);
ipoib_mcast_restart_task(&priv->restart_task);
}
}
mutex_lock(&priv->vlan_mutex);
void ipoib_ib_dev_flush(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, flush_task);
/* Flush any child interfaces too */
list_for_each_entry(cpriv, &priv->child_intfs, list)
ipoib_ib_dev_flush(&cpriv->flush_task);
ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
__ipoib_ib_dev_flush(priv, 0);
}
mutex_unlock(&priv->vlan_mutex);
void ipoib_pkey_event(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, pkey_event_task);
ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name);
__ipoib_ib_dev_flush(priv, 1);
}
void ipoib_ib_dev_cleanup(struct net_device *dev)
@ -736,7 +787,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
void ipoib_pkey_poll(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, pkey_task.work);
container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
struct net_device *dev = priv->dev;
ipoib_pkey_dev_check_presence(dev);
@ -747,7 +798,7 @@ void ipoib_pkey_poll(struct work_struct *work)
mutex_lock(&pkey_mutex);
if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
queue_delayed_work(ipoib_workqueue,
&priv->pkey_task,
&priv->pkey_poll_task,
HZ);
mutex_unlock(&pkey_mutex);
}
@ -766,7 +817,7 @@ int ipoib_pkey_dev_delay_open(struct net_device *dev)
mutex_lock(&pkey_mutex);
clear_bit(IPOIB_PKEY_STOP, &priv->flags);
queue_delayed_work(ipoib_workqueue,
&priv->pkey_task,
&priv->pkey_poll_task,
HZ);
mutex_unlock(&pkey_mutex);
return 1;

View file

@ -107,7 +107,7 @@ int ipoib_open(struct net_device *dev)
return -EINVAL;
if (ipoib_ib_dev_up(dev)) {
ipoib_ib_dev_stop(dev);
ipoib_ib_dev_stop(dev, 1);
return -EINVAL;
}
@ -152,7 +152,7 @@ static int ipoib_stop(struct net_device *dev)
flush_workqueue(ipoib_workqueue);
ipoib_ib_dev_down(dev, 1);
ipoib_ib_dev_stop(dev);
ipoib_ib_dev_stop(dev, 1);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
@ -988,7 +988,8 @@ static void ipoib_setup(struct net_device *dev)
INIT_LIST_HEAD(&priv->dead_ahs);
INIT_LIST_HEAD(&priv->multicast_list);
INIT_DELAYED_WORK(&priv->pkey_task, ipoib_pkey_poll);
INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush);
INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);

View file

@ -33,8 +33,6 @@
* $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
*/
#include <rdma/ib_cache.h>
#include "ipoib.h"
int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
@ -49,7 +47,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
if (!qp_attr)
goto out;
if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ret = -ENXIO;
goto out;
@ -94,26 +92,16 @@ int ipoib_init_qp(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
u16 pkey_index;
struct ib_qp_attr qp_attr;
int attr_mask;
/*
* Search through the port P_Key table for the requested pkey value.
* The port has to be assigned to the respective IB partition in
* advance.
*/
ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
if (ret) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
return ret;
}
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return -1;
qp_attr.qp_state = IB_QPS_INIT;
qp_attr.qkey = 0;
qp_attr.port_num = priv->port;
qp_attr.pkey_index = pkey_index;
qp_attr.pkey_index = priv->pkey_index;
attr_mask =
IB_QP_QKEY |
IB_QP_PORT |
@ -259,14 +247,18 @@ void ipoib_event(struct ib_event_handler *handler,
struct ipoib_dev_priv *priv =
container_of(handler, struct ipoib_dev_priv, event_handler);
if ((record->event == IB_EVENT_PORT_ERR ||
record->event == IB_EVENT_PKEY_CHANGE ||
record->event == IB_EVENT_PORT_ACTIVE ||
record->event == IB_EVENT_LID_CHANGE ||
record->event == IB_EVENT_SM_CHANGE ||
record->event == IB_EVENT_CLIENT_REREGISTER) &&
record->element.port_num == priv->port) {
if (record->element.port_num != priv->port)
return;
if (record->event == IB_EVENT_PORT_ERR ||
record->event == IB_EVENT_PORT_ACTIVE ||
record->event == IB_EVENT_LID_CHANGE ||
record->event == IB_EVENT_SM_CHANGE ||
record->event == IB_EVENT_CLIENT_REREGISTER) {
ipoib_dbg(priv, "Port state change event\n");
queue_work(ipoib_workqueue, &priv->flush_task);
} else if (record->event == IB_EVENT_PKEY_CHANGE) {
ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port);
queue_work(ipoib_workqueue, &priv->pkey_event_task);
}
}