ocfs2_dlm: Add timeout to dlm join domain
Currently the ocfs2 dlm has no timeout during dlm join domain. While this is not a problem in normal operation, this does become an issue if, say, the other node is refusing to let the node join the domain because of a stuck recovery. This patch adds a 90 sec timeout. Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
This commit is contained in:
parent
e4968476a9
commit
0dd82141b2
|
@ -1264,6 +1264,8 @@ bail:
|
||||||
static int dlm_join_domain(struct dlm_ctxt *dlm)
|
static int dlm_join_domain(struct dlm_ctxt *dlm)
|
||||||
{
|
{
|
||||||
int status;
|
int status;
|
||||||
|
unsigned int backoff;
|
||||||
|
unsigned int total_backoff = 0;
|
||||||
|
|
||||||
BUG_ON(!dlm);
|
BUG_ON(!dlm);
|
||||||
|
|
||||||
|
@ -1295,18 +1297,27 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
|
||||||
}
|
}
|
||||||
|
|
||||||
do {
|
do {
|
||||||
unsigned int backoff;
|
|
||||||
status = dlm_try_to_join_domain(dlm);
|
status = dlm_try_to_join_domain(dlm);
|
||||||
|
|
||||||
/* If we're racing another node to the join, then we
|
/* If we're racing another node to the join, then we
|
||||||
* need to back off temporarily and let them
|
* need to back off temporarily and let them
|
||||||
* complete. */
|
* complete. */
|
||||||
|
#define DLM_JOIN_TIMEOUT_MSECS 90000
|
||||||
if (status == -EAGAIN) {
|
if (status == -EAGAIN) {
|
||||||
if (signal_pending(current)) {
|
if (signal_pending(current)) {
|
||||||
status = -ERESTARTSYS;
|
status = -ERESTARTSYS;
|
||||||
goto bail;
|
goto bail;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (total_backoff >
|
||||||
|
msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
|
||||||
|
status = -ERESTARTSYS;
|
||||||
|
mlog(ML_NOTICE, "Timed out joining dlm domain "
|
||||||
|
"%s after %u msecs\n", dlm->name,
|
||||||
|
jiffies_to_msecs(total_backoff));
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* <chip> After you!
|
* <chip> After you!
|
||||||
* <dale> No, after you!
|
* <dale> No, after you!
|
||||||
|
@ -1316,6 +1327,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
|
||||||
*/
|
*/
|
||||||
backoff = (unsigned int)(jiffies & 0x3);
|
backoff = (unsigned int)(jiffies & 0x3);
|
||||||
backoff *= DLM_DOMAIN_BACKOFF_MS;
|
backoff *= DLM_DOMAIN_BACKOFF_MS;
|
||||||
|
total_backoff += backoff;
|
||||||
mlog(0, "backoff %d\n", backoff);
|
mlog(0, "backoff %d\n", backoff);
|
||||||
msleep(backoff);
|
msleep(backoff);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue