greybus: svc_watchdog: Add sysfs file to change the behavior of bite

Currently, AP performs unipro_reset if SVC fails to response to its
ping. While this error recovery is best suited for the end-user
experience, errors in the UniPro network could potentially go unnoticed
by the QA and fishfooders in the development phase of the project. This
patch adds an option to trigger a kernel panic so logs can be collected
for analysis.

Testing Done:
 - Reproduce issue and observe kernel panic when
   watchdob_control is changed to 'panic'

Signed-off-by: David Lin <dtwlin@google.com>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
David Lin 2016-07-26 16:27:28 -07:00 committed by Greg Kroah-Hartman
parent 6136cce89c
commit 7c4a0edb38
4 changed files with 66 additions and 8 deletions

View file

@ -257,3 +257,19 @@ Contact: Greg Kroah-Hartman <greg@kroah.com>
Description:
If the SVC watchdog is enabled or not. Writing 0 to this
file will disable the watchdog, writing 1 will enable it.
What: /sys/bus/greybus/devices/N-svc/watchdog_action
Date: July 2016
KernelVersion: 4.XX
Contact: Greg Kroah-Hartman <greg@kroah.com>
Description:
This attribute indicates the action to be performed upon SVC
watchdog bite.
The action can be one of the "reset" or "panic". Writing either
one of the "reset" or "panic" will change the behavior of SVC
watchdog bite. Default value is "reset".
"reset" means the UniPro subsystem is to be reset.
"panic" means SVC watchdog bite will cause kernel to panic.

View file

@ -100,6 +100,36 @@ static ssize_t watchdog_store(struct device *dev,
}
static DEVICE_ATTR_RW(watchdog);
static ssize_t watchdog_action_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct gb_svc *svc = to_gb_svc(dev);
if (svc->action == GB_SVC_WATCHDOG_BITE_PANIC_KERNEL)
return sprintf(buf, "panic\n");
else if (svc->action == GB_SVC_WATCHDOG_BITE_RESET_UNIPRO)
return sprintf(buf, "reset\n");
return -EINVAL;
}
static ssize_t watchdog_action_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
struct gb_svc *svc = to_gb_svc(dev);
if (sysfs_streq(buf, "panic"))
svc->action = GB_SVC_WATCHDOG_BITE_PANIC_KERNEL;
else if (sysfs_streq(buf, "reset"))
svc->action = GB_SVC_WATCHDOG_BITE_RESET_UNIPRO;
else
return -EINVAL;
return len;
}
static DEVICE_ATTR_RW(watchdog_action);
static int gb_svc_pwrmon_rail_count_get(struct gb_svc *svc, u8 *value)
{
struct gb_svc_pwrmon_rail_count_get_response response;
@ -222,6 +252,7 @@ static struct attribute *svc_attrs[] = {
&dev_attr_ap_intf_id.attr,
&dev_attr_intf_eject.attr,
&dev_attr_watchdog.attr,
&dev_attr_watchdog_action.attr,
NULL,
};
ATTRIBUTE_GROUPS(svc);

View file

@ -20,6 +20,11 @@ enum gb_svc_state {
GB_SVC_STATE_SVC_HELLO,
};
enum gb_svc_watchdog_bite {
GB_SVC_WATCHDOG_BITE_RESET_UNIPRO = 0,
GB_SVC_WATCHDOG_BITE_PANIC_KERNEL,
};
struct gb_svc_watchdog;
struct svc_debugfs_pwrmon_rail {
@ -43,6 +48,7 @@ struct gb_svc {
u8 protocol_minor;
struct gb_svc_watchdog *watchdog;
enum gb_svc_watchdog_bite action;
struct dentry *debugfs_dentry;
struct svc_debugfs_pwrmon_rail *pwrmon_rails;

View file

@ -83,16 +83,21 @@ static void do_work(struct work_struct *work)
dev_err(&svc->dev,
"SVC ping has returned %d, something is wrong!!!\n",
retval);
dev_err(&svc->dev, "Resetting the greybus network, watch out!!!\n");
INIT_DELAYED_WORK(&reset_work, greybus_reset);
queue_delayed_work(system_wq, &reset_work, HZ/2);
if (svc->action == GB_SVC_WATCHDOG_BITE_PANIC_KERNEL) {
panic("SVC is not responding\n");
} else if (svc->action == GB_SVC_WATCHDOG_BITE_RESET_UNIPRO) {
dev_err(&svc->dev, "Resetting the greybus network, watch out!!!\n");
/*
* Disable ourselves, we don't want to trip again unless
* userspace wants us to.
*/
watchdog->enabled = false;
INIT_DELAYED_WORK(&reset_work, greybus_reset);
queue_delayed_work(system_wq, &reset_work, HZ / 2);
/*
* Disable ourselves, we don't want to trip again unless
* userspace wants us to.
*/
watchdog->enabled = false;
}
}
/* resubmit our work to happen again, if we are still "alive" */