remarkable-linux/include/scsi/scsi_transport_srp.h
Bart Van Assche 93079162bf scsi_transport_srp: Fix a race condition
The rport timers must be stopped before the SRP initiator destroys the
resources associated with the SCSI host. This is necessary because
otherwise the callback functions invoked from the SRP transport layer
could trigger a use-after-free. Stopping the rport timers before
invoking scsi_remove_host() can trigger long delays in the SCSI error
handler if a transport layer failure occurs while scsi_remove_host()
is in progress. Hence move the code for stopping the rport timers from
srp_rport_release() into a new function and invoke that function after
scsi_remove_host() has finished. This patch fixes the following
sporadic kernel crash:

     kernel BUG at include/asm-generic/dma-mapping-common.h:64!
     invalid opcode: 0000 [#1] SMP
     RIP: 0010:[<ffffffffa03b20b1>]  [<ffffffffa03b20b1>] srp_unmap_data+0x121/0x130 [ib_srp]
     Call Trace:
     [<ffffffffa03b20fc>] srp_free_req+0x3c/0x80 [ib_srp]
     [<ffffffffa03b2188>] srp_finish_req+0x48/0x70 [ib_srp]
     [<ffffffffa03b21fb>] srp_terminate_io+0x4b/0x60 [ib_srp]
     [<ffffffffa03a6fb5>] __rport_fail_io_fast+0x75/0x80 [scsi_transport_srp]
     [<ffffffffa03a7438>] rport_fast_io_fail_timedout+0x88/0xc0 [scsi_transport_srp]
     [<ffffffff8108b370>] worker_thread+0x170/0x2a0
     [<ffffffff81090876>] kthread+0x96/0xa0
     [<ffffffff8100c0ca>] child_rip+0xa/0x20

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2014-01-21 10:46:17 -08:00

127 lines
3.8 KiB
C

#ifndef SCSI_TRANSPORT_SRP_H
#define SCSI_TRANSPORT_SRP_H
#include <linux/transport_class.h>
#include <linux/types.h>
#include <linux/mutex.h>
#define SRP_RPORT_ROLE_INITIATOR 0
#define SRP_RPORT_ROLE_TARGET 1
struct srp_rport_identifiers {
u8 port_id[16];
u8 roles;
};
/**
* enum srp_rport_state - SRP transport layer state
* @SRP_RPORT_RUNNING: Transport layer operational.
* @SRP_RPORT_BLOCKED: Transport layer not operational; fast I/O fail timer
* is running and I/O has been blocked.
* @SRP_RPORT_FAIL_FAST: Fast I/O fail timer has expired; fail I/O fast.
* @SRP_RPORT_LOST: Port is being removed.
*/
enum srp_rport_state {
SRP_RPORT_RUNNING,
SRP_RPORT_BLOCKED,
SRP_RPORT_FAIL_FAST,
SRP_RPORT_LOST,
};
/**
* struct srp_rport
* @lld_data: LLD private data.
* @mutex: Protects against concurrent rport reconnect / fast_io_fail /
* dev_loss_tmo activity.
*/
struct srp_rport {
/* for initiator and target drivers */
struct device dev;
u8 port_id[16];
u8 roles;
/* for initiator drivers */
void *lld_data;
struct mutex mutex;
enum srp_rport_state state;
int reconnect_delay;
int failed_reconnects;
struct delayed_work reconnect_work;
int fast_io_fail_tmo;
int dev_loss_tmo;
struct delayed_work fast_io_fail_work;
struct delayed_work dev_loss_work;
};
/**
* struct srp_function_template
* @has_rport_state: Whether or not to create the state, fast_io_fail_tmo and
* dev_loss_tmo sysfs attribute for an rport.
* @reset_timer_if_blocked: Whether or srp_timed_out() should reset the command
* timer if the device on which it has been queued is blocked.
* @reconnect_delay: If not NULL, points to the default reconnect_delay value.
* @fast_io_fail_tmo: If not NULL, points to the default fast_io_fail_tmo value.
* @dev_loss_tmo: If not NULL, points to the default dev_loss_tmo value.
* @reconnect: Callback function for reconnecting to the target. See also
* srp_reconnect_rport().
* @terminate_rport_io: Callback function for terminating all outstanding I/O
* requests for an rport.
*/
struct srp_function_template {
/* for initiator drivers */
bool has_rport_state;
bool reset_timer_if_blocked;
int *reconnect_delay;
int *fast_io_fail_tmo;
int *dev_loss_tmo;
int (*reconnect)(struct srp_rport *rport);
void (*terminate_rport_io)(struct srp_rport *rport);
void (*rport_delete)(struct srp_rport *rport);
/* for target drivers */
int (* tsk_mgmt_response)(struct Scsi_Host *, u64, u64, int);
int (* it_nexus_response)(struct Scsi_Host *, u64, int);
};
extern struct scsi_transport_template *
srp_attach_transport(struct srp_function_template *);
extern void srp_release_transport(struct scsi_transport_template *);
extern void srp_rport_get(struct srp_rport *rport);
extern void srp_rport_put(struct srp_rport *rport);
extern struct srp_rport *srp_rport_add(struct Scsi_Host *,
struct srp_rport_identifiers *);
extern void srp_rport_del(struct srp_rport *);
extern int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo,
int dev_loss_tmo);
extern int srp_reconnect_rport(struct srp_rport *rport);
extern void srp_start_tl_fail_timers(struct srp_rport *rport);
extern void srp_remove_host(struct Scsi_Host *);
extern void srp_stop_rport_timers(struct srp_rport *rport);
/**
* srp_chkready() - evaluate the transport layer state before I/O
*
* Returns a SCSI result code that can be returned by the LLD queuecommand()
* implementation. The role of this function is similar to that of
* fc_remote_port_chkready().
*/
static inline int srp_chkready(struct srp_rport *rport)
{
switch (rport->state) {
case SRP_RPORT_RUNNING:
case SRP_RPORT_BLOCKED:
default:
return 0;
case SRP_RPORT_FAIL_FAST:
return DID_TRANSPORT_FAILFAST << 16;
case SRP_RPORT_LOST:
return DID_NO_CONNECT << 16;
}
}
#endif