1
0
Fork 0
alistair23-linux/fs/gfs2/sys.c

732 lines
18 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/sched.h>
#include <linux/cred.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/module.h>
#include <linux/kobject.h>
#include <linux/uaccess.h>
#include <linux/gfs2_ondisk.h>
#include <linux/genhd.h>
#include "gfs2.h"
#include "incore.h"
#include "sys.h"
#include "super.h"
#include "glock.h"
#include "quota.h"
#include "util.h"
#include "glops.h"
#include "recovery.h"
struct gfs2_attr {
struct attribute attr;
ssize_t (*show)(struct gfs2_sbd *, char *);
ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
};
static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
return a->show ? a->show(sdp, buf) : 0;
}
static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t len)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
return a->store ? a->store(sdp, buf, len) : len;
}
static const struct sysfs_ops gfs2_attr_ops = {
.show = gfs2_attr_show,
.store = gfs2_attr_store,
};
static struct kset *gfs2_kset;
static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%u:%u\n",
MAJOR(sdp->sd_vfs->s_dev), MINOR(sdp->sd_vfs->s_dev));
}
static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
}
static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
{
struct super_block *s = sdp->sd_vfs;
buf[0] = '\0';
if (uuid_is_null(&s->s_uuid))
return 0;
return snprintf(buf, PAGE_SIZE, "%pUB\n", &s->s_uuid);
}
static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
{
struct super_block *sb = sdp->sd_vfs;
int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1;
return snprintf(buf, PAGE_SIZE, "%d\n", frozen);
}
static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
int error, n;
error = kstrtoint(buf, 0, &n);
if (error)
return error;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
switch (n) {
case 0:
error = thaw_super(sdp->sd_vfs);
break;
case 1:
error = freeze_super(sdp->sd_vfs);
break;
default:
return -EINVAL;
}
if (error) {
fs_warn(sdp, "freeze %d error %d\n", n, error);
return error;
}
return len;
}
static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
{
unsigned int b = test_bit(SDF_WITHDRAWN, &sdp->sd_flags);
return snprintf(buf, PAGE_SIZE, "%u\n", b);
}
static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
int error, val;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
error = kstrtoint(buf, 0, &val);
if (error)
return error;
if (val != 1)
return -EINVAL;
gfs2_lm_withdraw(sdp, "withdrawing from cluster at user's request\n");
return len;
}
static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
int error, val;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
error = kstrtoint(buf, 0, &val);
if (error)
return error;
if (val != 1)
return -EINVAL;
gfs2_statfs_sync(sdp->sd_vfs, 0);
return len;
}
static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
int error, val;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
error = kstrtoint(buf, 0, &val);
if (error)
return error;
if (val != 1)
return -EINVAL;
gfs2_quota_sync(sdp->sd_vfs, 0);
return len;
}
static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
struct kqid qid;
int error;
u32 id;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
error = kstrtou32(buf, 0, &id);
if (error)
return error;
qid = make_kqid(current_user_ns(), USRQUOTA, id);
if (!qid_valid(qid))
return -EINVAL;
error = gfs2_quota_refresh(sdp, qid);
return error ? error : len;
}
static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
struct kqid qid;
int error;
u32 id;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
error = kstrtou32(buf, 0, &id);
if (error)
return error;
qid = make_kqid(current_user_ns(), GRPQUOTA, id);
if (!qid_valid(qid))
return -EINVAL;
error = gfs2_quota_refresh(sdp, qid);
return error ? error : len;
}
static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
struct gfs2_glock *gl;
const struct gfs2_glock_operations *glops;
unsigned int glmode;
unsigned int gltype;
unsigned long long glnum;
char mode[16];
int rv;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
rv = sscanf(buf, "%u:%llu %15s", &gltype, &glnum,
mode);
if (rv != 3)
return -EINVAL;
if (strcmp(mode, "EX") == 0)
glmode = LM_ST_UNLOCKED;
else if ((strcmp(mode, "CW") == 0) || (strcmp(mode, "DF") == 0))
glmode = LM_ST_DEFERRED;
else if ((strcmp(mode, "PR") == 0) || (strcmp(mode, "SH") == 0))
glmode = LM_ST_SHARED;
else
return -EINVAL;
if (gltype > LM_TYPE_JOURNAL)
return -EINVAL;
GFS2: remove transaction glock GFS2 has a transaction glock, which must be grabbed for every transaction, whose purpose is to deal with freezing the filesystem. Aside from this involving a large amount of locking, it is very easy to make the current fsfreeze code hang on unfreezing. This patch rewrites how gfs2 handles freezing the filesystem. The transaction glock is removed. In it's place is a freeze glock, which is cached (but not held) in a shared state by every node in the cluster when the filesystem is mounted. This lock only needs to be grabbed on freezing, and actions which need to be safe from freezing, like recovery. When a node wants to freeze the filesystem, it grabs this glock exclusively. When the freeze glock state changes on the nodes (either from shared to unlocked, or shared to exclusive), the filesystem does a special log flush. gfs2_log_flush() does all the work for flushing out the and shutting down the incore log, and then it tries to grab the freeze glock in a shared state again. Since the filesystem is stuck in gfs2_log_flush, no new transaction can start, and nothing can be written to disk. Unfreezing the filesytem simply involes dropping the freeze glock, allowing gfs2_log_flush() to grab and then release the shared lock, so it is cached for next time. However, in order for the unfreezing ioctl to occur, gfs2 needs to get a shared lock on the filesystem root directory inode to check permissions. If that glock has already been grabbed exclusively, fsfreeze will be unable to get the shared lock and unfreeze the filesystem. In order to allow the unfreeze, this patch makes gfs2 grab a shared lock on the filesystem root directory during the freeze, and hold it until it unfreezes the filesystem. The functions which need to grab a shared lock in order to allow the unfreeze ioctl to be issued now use the lock grabbed by the freeze code instead. The freeze and unfreeze code take care to make sure that this shared lock will not be dropped while another process is using it. Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2014-05-01 21:26:55 -06:00
if (gltype == LM_TYPE_NONDISK && glnum == GFS2_FREEZE_LOCK)
glops = &gfs2_freeze_glops;
else
glops = gfs2_glops_list[gltype];
if (glops == NULL)
return -EINVAL;
if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags))
fs_info(sdp, "demote interface used\n");
rv = gfs2_glock_get(sdp, glnum, glops, 0, &gl);
if (rv)
return rv;
gfs2_glock_cb(gl, glmode);
gfs2_glock_put(gl);
return len;
}
#define GFS2_ATTR(name, mode, show, store) \
static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
GFS2_ATTR(id, 0444, id_show, NULL);
GFS2_ATTR(fsname, 0444, fsname_show, NULL);
GFS2_ATTR(uuid, 0444, uuid_show, NULL);
GFS2_ATTR(freeze, 0644, freeze_show, freeze_store);
GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store);
GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store);
GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store);
GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store);
GFS2_ATTR(demote_rq, 0200, NULL, demote_rq_store);
static struct attribute *gfs2_attrs[] = {
&gfs2_attr_id.attr,
&gfs2_attr_fsname.attr,
&gfs2_attr_uuid.attr,
&gfs2_attr_freeze.attr,
&gfs2_attr_withdraw.attr,
&gfs2_attr_statfs_sync.attr,
&gfs2_attr_quota_sync.attr,
&gfs2_attr_quota_refresh_user.attr,
&gfs2_attr_quota_refresh_group.attr,
&gfs2_attr_demote_rq.attr,
NULL,
};
ATTRIBUTE_GROUPS(gfs2);
static void gfs2_sbd_release(struct kobject *kobj)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
gfs2: use-after-free in sysfs deregistration [ Upstream commit c2a04b02c060c4858762edce4674d5cba3e5a96f ] syzkaller found the following splat with CONFIG_DEBUG_KOBJECT_RELEASE=y: Read of size 1 at addr ffff000028e896b8 by task kworker/1:2/228 CPU: 1 PID: 228 Comm: kworker/1:2 Tainted: G S 5.9.0-rc8+ #101 Hardware name: linux,dummy-virt (DT) Workqueue: events kobject_delayed_cleanup Call trace: dump_backtrace+0x0/0x4d8 show_stack+0x34/0x48 dump_stack+0x174/0x1f8 print_address_description.constprop.0+0x5c/0x550 kasan_report+0x13c/0x1c0 __asan_report_load1_noabort+0x34/0x60 memcmp+0xd0/0xd8 gfs2_uevent+0xc4/0x188 kobject_uevent_env+0x54c/0x1240 kobject_uevent+0x2c/0x40 __kobject_del+0x190/0x1d8 kobject_delayed_cleanup+0x2bc/0x3b8 process_one_work+0x96c/0x18c0 worker_thread+0x3f0/0xc30 kthread+0x390/0x498 ret_from_fork+0x10/0x18 Allocated by task 1110: kasan_save_stack+0x28/0x58 __kasan_kmalloc.isra.0+0xc8/0xe8 kasan_kmalloc+0x10/0x20 kmem_cache_alloc_trace+0x1d8/0x2f0 alloc_super+0x64/0x8c0 sget_fc+0x110/0x620 get_tree_bdev+0x190/0x648 gfs2_get_tree+0x50/0x228 vfs_get_tree+0x84/0x2e8 path_mount+0x1134/0x1da8 do_mount+0x124/0x138 __arm64_sys_mount+0x164/0x238 el0_svc_common.constprop.0+0x15c/0x598 do_el0_svc+0x60/0x150 el0_svc+0x34/0xb0 el0_sync_handler+0xc8/0x5b4 el0_sync+0x15c/0x180 Freed by task 228: kasan_save_stack+0x28/0x58 kasan_set_track+0x28/0x40 kasan_set_free_info+0x24/0x48 __kasan_slab_free+0x118/0x190 kasan_slab_free+0x14/0x20 slab_free_freelist_hook+0x6c/0x210 kfree+0x13c/0x460 Use the same pattern as f2fs + ext4 where the kobject destruction must complete before allowing the FS itself to be freed. This means that we need an explicit free_sbd in the callers. Cc: Bob Peterson <rpeterso@redhat.com> Cc: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Jamie Iles <jamie@nuviainc.com> [Also go to fail_free when init_names fails.] Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2020-10-12 07:13:09 -06:00
complete(&sdp->sd_kobj_unregister);
}
static struct kobj_type gfs2_ktype = {
.release = gfs2_sbd_release,
.default_groups = gfs2_groups,
.sysfs_ops = &gfs2_attr_ops,
};
/*
* lock_module. Originally from lock_dlm
*/
static ssize_t proto_name_show(struct gfs2_sbd *sdp, char *buf)
{
const struct lm_lockops *ops = sdp->sd_lockstruct.ls_ops;
return sprintf(buf, "%s\n", ops->lm_proto_name);
}
static ssize_t block_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
ssize_t ret;
int val = 0;
if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))
val = 1;
ret = sprintf(buf, "%d\n", val);
return ret;
}
static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
int ret, val;
ret = kstrtoint(buf, 0, &val);
if (ret)
return ret;
if (val == 1)
set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
else if (val == 0) {
clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
smp_mb__after_atomic();
gfs2_glock_thaw(sdp);
} else {
return -EINVAL;
}
return len;
}
static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf)
{
int val = completion_done(&sdp->sd_wdack) ? 1 : 0;
return sprintf(buf, "%d\n", val);
}
static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
int ret, val;
ret = kstrtoint(buf, 0, &val);
if (ret)
return ret;
if ((val == 1) &&
!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
complete(&sdp->sd_wdack);
else
return -EINVAL;
return len;
}
static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
return sprintf(buf, "%d\n", ls->ls_first);
}
static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
unsigned first;
int rv;
rv = sscanf(buf, "%u", &first);
if (rv != 1 || first > 1)
return -EINVAL;
rv = wait_for_completion_killable(&sdp->sd_locking_init);
if (rv)
return rv;
spin_lock(&sdp->sd_jindex_spin);
rv = -EBUSY;
if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0)
goto out;
rv = -EINVAL;
if (sdp->sd_args.ar_spectator)
goto out;
if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
goto out;
sdp->sd_lockstruct.ls_first = first;
rv = 0;
out:
spin_unlock(&sdp->sd_jindex_spin);
return rv ? rv : len;
}
static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags));
}
int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid)
{
struct gfs2_jdesc *jd;
int rv;
/* Wait for our primary journal to be initialized */
wait_for_completion(&sdp->sd_journal_ready);
spin_lock(&sdp->sd_jindex_spin);
rv = -EBUSY;
GFS2: Fix recovery issues for spectators This patch fixes a couple problems dealing with spectators who remain with gfs2 mounts after the last non-spectator node fails. Before this patch, spectator mounts would try to acquire the dlm's mounted lock EX as part of its normal recovery sequence. The mounted lock is only used to determine whether the node is the first mounter, the first node to mount the file system, for the purposes of file system recovery and journal replay. It's not necessary for spectators: they should never do journal recovery. If they acquire the lock it will prevent another "real" first-mounter from acquiring the lock in EX mode, which means it also cannot do journal recovery because it doesn't think it's the first node to mount the file system. This patch checks if the mounter is a spectator, and if so, avoids grabbing the mounted lock. This allows a secondary mounter who is really the first non-spectator mounter, to do journal recovery: since the spectator doesn't acquire the lock, it can grab it in EX mode, and therefore consider itself to be the first mounter both as a "real" first mount, and as a first-real-after-spectator. Note that the control lock still needs to be taken in PR mode in order to fetch the lvb value so it has the current status of all journal's recovery. This is used as it is today by a first mounter to replay the journals. For spectators, it's merely used to fetch the status bits. All recovery is bypassed and the node waits until recovery is completed by a non-spectator node. I also improved the cryptic message given by control_mount when a spectator is waiting for a non-spectator to perform recovery. It also fixes a problem in gfs2_recover_set whereby spectators were never queueing recovery work for their own journal. They cannot do recovery themselves, but they still need to queue the work so they can check the recovery bits and clear the DFL_BLOCK_LOCKS bit once the recovery happens on another node. When the work queue runs on a spectator, it bypasses most of the work so it won't print a bunch of annoying messages. All it will print is a bunch of messages that look like this until recovery completes on the non-spectator node: GFS2: fsid=mycluster:scratch.s: recover generation 3 jid 0 GFS2: fsid=mycluster:scratch.s: recover jid 0 result busy These continue every 1.5 seconds until the recovery is done by the non-spectator, at which time it says: GFS2: fsid=mycluster:scratch.s: recover generation 4 done Then it proceeds with its mount. If the file system is mounted in spectator node and the last remaining non-spectator is fenced, any IO to the file system is blocked by dlm and the spectator waits until recovery is performed by a non-spectator. If a spectator tries to mount the file system before any non-spectators, it blocks and repeatedly gives this kernel message: GFS2: fsid=mycluster:scratch: Recovery is required. Waiting for a non-spectator to mount. GFS2: fsid=mycluster:scratch: Recovery is required. Waiting for a non-spectator to mount. Signed-off-by: Bob Peterson <rpeterso@redhat.com> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
2018-07-05 13:40:46 -06:00
/**
* If we're a spectator, we use journal0, but it's not really ours.
* So we need to wait for its recovery too. If we skip it we'd never
* queue work to the recovery workqueue, and so its completion would
* never clear the DFL_BLOCK_LOCKS flag, so all our locks would
* permanently stop working.
*/
if (sdp->sd_jdesc->jd_jid == jid && !sdp->sd_args.ar_spectator)
goto out;
rv = -ENOENT;
list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
GFS2: Fix recovery issues for spectators This patch fixes a couple problems dealing with spectators who remain with gfs2 mounts after the last non-spectator node fails. Before this patch, spectator mounts would try to acquire the dlm's mounted lock EX as part of its normal recovery sequence. The mounted lock is only used to determine whether the node is the first mounter, the first node to mount the file system, for the purposes of file system recovery and journal replay. It's not necessary for spectators: they should never do journal recovery. If they acquire the lock it will prevent another "real" first-mounter from acquiring the lock in EX mode, which means it also cannot do journal recovery because it doesn't think it's the first node to mount the file system. This patch checks if the mounter is a spectator, and if so, avoids grabbing the mounted lock. This allows a secondary mounter who is really the first non-spectator mounter, to do journal recovery: since the spectator doesn't acquire the lock, it can grab it in EX mode, and therefore consider itself to be the first mounter both as a "real" first mount, and as a first-real-after-spectator. Note that the control lock still needs to be taken in PR mode in order to fetch the lvb value so it has the current status of all journal's recovery. This is used as it is today by a first mounter to replay the journals. For spectators, it's merely used to fetch the status bits. All recovery is bypassed and the node waits until recovery is completed by a non-spectator node. I also improved the cryptic message given by control_mount when a spectator is waiting for a non-spectator to perform recovery. It also fixes a problem in gfs2_recover_set whereby spectators were never queueing recovery work for their own journal. They cannot do recovery themselves, but they still need to queue the work so they can check the recovery bits and clear the DFL_BLOCK_LOCKS bit once the recovery happens on another node. When the work queue runs on a spectator, it bypasses most of the work so it won't print a bunch of annoying messages. All it will print is a bunch of messages that look like this until recovery completes on the non-spectator node: GFS2: fsid=mycluster:scratch.s: recover generation 3 jid 0 GFS2: fsid=mycluster:scratch.s: recover jid 0 result busy These continue every 1.5 seconds until the recovery is done by the non-spectator, at which time it says: GFS2: fsid=mycluster:scratch.s: recover generation 4 done Then it proceeds with its mount. If the file system is mounted in spectator node and the last remaining non-spectator is fenced, any IO to the file system is blocked by dlm and the spectator waits until recovery is performed by a non-spectator. If a spectator tries to mount the file system before any non-spectators, it blocks and repeatedly gives this kernel message: GFS2: fsid=mycluster:scratch: Recovery is required. Waiting for a non-spectator to mount. GFS2: fsid=mycluster:scratch: Recovery is required. Waiting for a non-spectator to mount. Signed-off-by: Bob Peterson <rpeterso@redhat.com> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
2018-07-05 13:40:46 -06:00
if (jd->jd_jid != jid && !sdp->sd_args.ar_spectator)
continue;
rv = gfs2_recover_journal(jd, false);
break;
}
out:
spin_unlock(&sdp->sd_jindex_spin);
return rv;
}
static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
unsigned jid;
int rv;
rv = sscanf(buf, "%u", &jid);
if (rv != 1)
return -EINVAL;
if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) {
rv = -ESHUTDOWN;
goto out;
}
rv = gfs2_recover_set(sdp, jid);
out:
return rv ? rv : len;
}
static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
return sprintf(buf, "%d\n", ls->ls_recover_jid_done);
}
static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
return sprintf(buf, "%d\n", ls->ls_recover_jid_status);
}
static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf)
{
return sprintf(buf, "%d\n", sdp->sd_lockstruct.ls_jid);
}
static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
int jid;
int rv;
rv = sscanf(buf, "%d", &jid);
if (rv != 1)
return -EINVAL;
rv = wait_for_completion_killable(&sdp->sd_locking_init);
if (rv)
return rv;
spin_lock(&sdp->sd_jindex_spin);
rv = -EINVAL;
if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
goto out;
rv = -EBUSY;
if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0)
goto out;
rv = 0;
if (sdp->sd_args.ar_spectator && jid > 0)
rv = jid = -EINVAL;
sdp->sd_lockstruct.ls_jid = jid;
clear_bit(SDF_NOJOURNALID, &sdp->sd_flags);
smp_mb__after_atomic();
wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
out:
spin_unlock(&sdp->sd_jindex_spin);
return rv ? rv : len;
}
#define GDLM_ATTR(_name,_mode,_show,_store) \
static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
GDLM_ATTR(block, 0644, block_show, block_store);
GDLM_ATTR(withdraw, 0644, wdack_show, wdack_store);
GDLM_ATTR(jid, 0644, jid_show, jid_store);
GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store);
GDLM_ATTR(first_done, 0444, first_done_show, NULL);
GDLM_ATTR(recover, 0600, NULL, recover_store);
GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
static struct attribute *lock_module_attrs[] = {
&gdlm_attr_proto_name.attr,
&gdlm_attr_block.attr,
&gdlm_attr_withdraw.attr,
&gdlm_attr_jid.attr,
&gdlm_attr_first.attr,
&gdlm_attr_first_done.attr,
&gdlm_attr_recover.attr,
&gdlm_attr_recover_done.attr,
&gdlm_attr_recover_status.attr,
NULL,
};
/*
* get and set struct gfs2_tune fields
*/
static ssize_t quota_scale_show(struct gfs2_sbd *sdp, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%u %u\n",
sdp->sd_tune.gt_quota_scale_num,
sdp->sd_tune.gt_quota_scale_den);
}
static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
struct gfs2_tune *gt = &sdp->sd_tune;
unsigned int x, y;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (sscanf(buf, "%u %u", &x, &y) != 2 || !y)
return -EINVAL;
spin_lock(&gt->gt_spin);
gt->gt_quota_scale_num = x;
gt->gt_quota_scale_den = y;
spin_unlock(&gt->gt_spin);
return len;
}
static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
int check_zero, const char *buf, size_t len)
{
struct gfs2_tune *gt = &sdp->sd_tune;
unsigned int x;
int error;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
error = kstrtouint(buf, 0, &x);
if (error)
return error;
if (check_zero && !x)
return -EINVAL;
spin_lock(&gt->gt_spin);
*field = x;
spin_unlock(&gt->gt_spin);
return len;
}
#define TUNE_ATTR_3(name, show, store) \
static struct gfs2_attr tune_attr_##name = __ATTR(name, 0644, show, store)
#define TUNE_ATTR_2(name, store) \
static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
{ \
return snprintf(buf, PAGE_SIZE, "%u\n", sdp->sd_tune.gt_##name); \
} \
TUNE_ATTR_3(name, name##_show, store)
#define TUNE_ATTR(name, check_zero) \
static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
{ \
return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, buf, len); \
} \
TUNE_ATTR_2(name, name##_store)
TUNE_ATTR(quota_warn_period, 0);
TUNE_ATTR(quota_quantum, 0);
TUNE_ATTR(max_readahead, 0);
TUNE_ATTR(complain_secs, 0);
TUNE_ATTR(statfs_slow, 0);
TUNE_ATTR(new_files_jdata, 0);
TUNE_ATTR(statfs_quantum, 1);
TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
static struct attribute *tune_attrs[] = {
&tune_attr_quota_warn_period.attr,
&tune_attr_quota_quantum.attr,
&tune_attr_max_readahead.attr,
&tune_attr_complain_secs.attr,
&tune_attr_statfs_slow.attr,
&tune_attr_statfs_quantum.attr,
&tune_attr_quota_scale.attr,
&tune_attr_new_files_jdata.attr,
NULL,
};
static const struct attribute_group tune_group = {
.name = "tune",
.attrs = tune_attrs,
};
static const struct attribute_group lock_module_group = {
.name = "lock_module",
.attrs = lock_module_attrs,
};
int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
{
struct super_block *sb = sdp->sd_vfs;
int error;
char ro[20];
char spectator[20];
char *envp[] = { ro, spectator, NULL };
sprintf(ro, "RDONLY=%d", sb_rdonly(sb));
sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
gfs2: use-after-free in sysfs deregistration [ Upstream commit c2a04b02c060c4858762edce4674d5cba3e5a96f ] syzkaller found the following splat with CONFIG_DEBUG_KOBJECT_RELEASE=y: Read of size 1 at addr ffff000028e896b8 by task kworker/1:2/228 CPU: 1 PID: 228 Comm: kworker/1:2 Tainted: G S 5.9.0-rc8+ #101 Hardware name: linux,dummy-virt (DT) Workqueue: events kobject_delayed_cleanup Call trace: dump_backtrace+0x0/0x4d8 show_stack+0x34/0x48 dump_stack+0x174/0x1f8 print_address_description.constprop.0+0x5c/0x550 kasan_report+0x13c/0x1c0 __asan_report_load1_noabort+0x34/0x60 memcmp+0xd0/0xd8 gfs2_uevent+0xc4/0x188 kobject_uevent_env+0x54c/0x1240 kobject_uevent+0x2c/0x40 __kobject_del+0x190/0x1d8 kobject_delayed_cleanup+0x2bc/0x3b8 process_one_work+0x96c/0x18c0 worker_thread+0x3f0/0xc30 kthread+0x390/0x498 ret_from_fork+0x10/0x18 Allocated by task 1110: kasan_save_stack+0x28/0x58 __kasan_kmalloc.isra.0+0xc8/0xe8 kasan_kmalloc+0x10/0x20 kmem_cache_alloc_trace+0x1d8/0x2f0 alloc_super+0x64/0x8c0 sget_fc+0x110/0x620 get_tree_bdev+0x190/0x648 gfs2_get_tree+0x50/0x228 vfs_get_tree+0x84/0x2e8 path_mount+0x1134/0x1da8 do_mount+0x124/0x138 __arm64_sys_mount+0x164/0x238 el0_svc_common.constprop.0+0x15c/0x598 do_el0_svc+0x60/0x150 el0_svc+0x34/0xb0 el0_sync_handler+0xc8/0x5b4 el0_sync+0x15c/0x180 Freed by task 228: kasan_save_stack+0x28/0x58 kasan_set_track+0x28/0x40 kasan_set_free_info+0x24/0x48 __kasan_slab_free+0x118/0x190 kasan_slab_free+0x14/0x20 slab_free_freelist_hook+0x6c/0x210 kfree+0x13c/0x460 Use the same pattern as f2fs + ext4 where the kobject destruction must complete before allowing the FS itself to be freed. This means that we need an explicit free_sbd in the callers. Cc: Bob Peterson <rpeterso@redhat.com> Cc: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Jamie Iles <jamie@nuviainc.com> [Also go to fail_free when init_names fails.] Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2020-10-12 07:13:09 -06:00
init_completion(&sdp->sd_kobj_unregister);
sdp->sd_kobj.kset = gfs2_kset;
error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL,
"%s", sdp->sd_table_name);
if (error)
goto fail_reg;
error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
if (error)
goto fail_reg;
error = sysfs_create_group(&sdp->sd_kobj, &lock_module_group);
if (error)
goto fail_tune;
error = sysfs_create_link(&sdp->sd_kobj,
&disk_to_dev(sb->s_bdev->bd_disk)->kobj,
"device");
if (error)
goto fail_lock_module;
kobject_uevent_env(&sdp->sd_kobj, KOBJ_ADD, envp);
return 0;
fail_lock_module:
sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
fail_tune:
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
fail_reg:
fs_err(sdp, "error %d adding sysfs files\n", error);
kobject_put(&sdp->sd_kobj);
gfs2: use-after-free in sysfs deregistration [ Upstream commit c2a04b02c060c4858762edce4674d5cba3e5a96f ] syzkaller found the following splat with CONFIG_DEBUG_KOBJECT_RELEASE=y: Read of size 1 at addr ffff000028e896b8 by task kworker/1:2/228 CPU: 1 PID: 228 Comm: kworker/1:2 Tainted: G S 5.9.0-rc8+ #101 Hardware name: linux,dummy-virt (DT) Workqueue: events kobject_delayed_cleanup Call trace: dump_backtrace+0x0/0x4d8 show_stack+0x34/0x48 dump_stack+0x174/0x1f8 print_address_description.constprop.0+0x5c/0x550 kasan_report+0x13c/0x1c0 __asan_report_load1_noabort+0x34/0x60 memcmp+0xd0/0xd8 gfs2_uevent+0xc4/0x188 kobject_uevent_env+0x54c/0x1240 kobject_uevent+0x2c/0x40 __kobject_del+0x190/0x1d8 kobject_delayed_cleanup+0x2bc/0x3b8 process_one_work+0x96c/0x18c0 worker_thread+0x3f0/0xc30 kthread+0x390/0x498 ret_from_fork+0x10/0x18 Allocated by task 1110: kasan_save_stack+0x28/0x58 __kasan_kmalloc.isra.0+0xc8/0xe8 kasan_kmalloc+0x10/0x20 kmem_cache_alloc_trace+0x1d8/0x2f0 alloc_super+0x64/0x8c0 sget_fc+0x110/0x620 get_tree_bdev+0x190/0x648 gfs2_get_tree+0x50/0x228 vfs_get_tree+0x84/0x2e8 path_mount+0x1134/0x1da8 do_mount+0x124/0x138 __arm64_sys_mount+0x164/0x238 el0_svc_common.constprop.0+0x15c/0x598 do_el0_svc+0x60/0x150 el0_svc+0x34/0xb0 el0_sync_handler+0xc8/0x5b4 el0_sync+0x15c/0x180 Freed by task 228: kasan_save_stack+0x28/0x58 kasan_set_track+0x28/0x40 kasan_set_free_info+0x24/0x48 __kasan_slab_free+0x118/0x190 kasan_slab_free+0x14/0x20 slab_free_freelist_hook+0x6c/0x210 kfree+0x13c/0x460 Use the same pattern as f2fs + ext4 where the kobject destruction must complete before allowing the FS itself to be freed. This means that we need an explicit free_sbd in the callers. Cc: Bob Peterson <rpeterso@redhat.com> Cc: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Jamie Iles <jamie@nuviainc.com> [Also go to fail_free when init_names fails.] Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2020-10-12 07:13:09 -06:00
wait_for_completion(&sdp->sd_kobj_unregister);
sb->s_fs_info = NULL;
return error;
}
void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
{
sysfs_remove_link(&sdp->sd_kobj, "device");
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
kobject_put(&sdp->sd_kobj);
gfs2: use-after-free in sysfs deregistration [ Upstream commit c2a04b02c060c4858762edce4674d5cba3e5a96f ] syzkaller found the following splat with CONFIG_DEBUG_KOBJECT_RELEASE=y: Read of size 1 at addr ffff000028e896b8 by task kworker/1:2/228 CPU: 1 PID: 228 Comm: kworker/1:2 Tainted: G S 5.9.0-rc8+ #101 Hardware name: linux,dummy-virt (DT) Workqueue: events kobject_delayed_cleanup Call trace: dump_backtrace+0x0/0x4d8 show_stack+0x34/0x48 dump_stack+0x174/0x1f8 print_address_description.constprop.0+0x5c/0x550 kasan_report+0x13c/0x1c0 __asan_report_load1_noabort+0x34/0x60 memcmp+0xd0/0xd8 gfs2_uevent+0xc4/0x188 kobject_uevent_env+0x54c/0x1240 kobject_uevent+0x2c/0x40 __kobject_del+0x190/0x1d8 kobject_delayed_cleanup+0x2bc/0x3b8 process_one_work+0x96c/0x18c0 worker_thread+0x3f0/0xc30 kthread+0x390/0x498 ret_from_fork+0x10/0x18 Allocated by task 1110: kasan_save_stack+0x28/0x58 __kasan_kmalloc.isra.0+0xc8/0xe8 kasan_kmalloc+0x10/0x20 kmem_cache_alloc_trace+0x1d8/0x2f0 alloc_super+0x64/0x8c0 sget_fc+0x110/0x620 get_tree_bdev+0x190/0x648 gfs2_get_tree+0x50/0x228 vfs_get_tree+0x84/0x2e8 path_mount+0x1134/0x1da8 do_mount+0x124/0x138 __arm64_sys_mount+0x164/0x238 el0_svc_common.constprop.0+0x15c/0x598 do_el0_svc+0x60/0x150 el0_svc+0x34/0xb0 el0_sync_handler+0xc8/0x5b4 el0_sync+0x15c/0x180 Freed by task 228: kasan_save_stack+0x28/0x58 kasan_set_track+0x28/0x40 kasan_set_free_info+0x24/0x48 __kasan_slab_free+0x118/0x190 kasan_slab_free+0x14/0x20 slab_free_freelist_hook+0x6c/0x210 kfree+0x13c/0x460 Use the same pattern as f2fs + ext4 where the kobject destruction must complete before allowing the FS itself to be freed. This means that we need an explicit free_sbd in the callers. Cc: Bob Peterson <rpeterso@redhat.com> Cc: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Jamie Iles <jamie@nuviainc.com> [Also go to fail_free when init_names fails.] Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2020-10-12 07:13:09 -06:00
wait_for_completion(&sdp->sd_kobj_unregister);
}
static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
struct kobj_uevent_env *env)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
struct super_block *s = sdp->sd_vfs;
add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags))
add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid);
if (!uuid_is_null(&s->s_uuid))
add_uevent_var(env, "UUID=%pUB", &s->s_uuid);
return 0;
}
static const struct kset_uevent_ops gfs2_uevent_ops = {
.uevent = gfs2_uevent,
};
int gfs2_sys_init(void)
{
gfs2_kset = kset_create_and_add("gfs2", &gfs2_uevent_ops, fs_kobj);
if (!gfs2_kset)
return -ENOMEM;
return 0;
}
void gfs2_sys_uninit(void)
{
kset_unregister(gfs2_kset);
}