From b5799018827e67867fda77c3cde10fc080606fe2 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 23 Sep 2015 12:44:17 -0400 Subject: [PATCH 01/26] ext4: move sysfs code from super.c to fs/ext4/sysfs.c Also statically allocate the ext4_kset and ext4_feat objects, since we only need exactly one of each, and it's simpler and less code if we drop the dynamic allocation and deallocation when it's not needed. Signed-off-by: Theodore Ts'o --- fs/ext4/Makefile | 2 +- fs/ext4/ext4.h | 5 + fs/ext4/super.c | 426 ++--------------------------------------------- fs/ext4/sysfs.c | 387 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 410 insertions(+), 410 deletions(-) create mode 100644 fs/ext4/sysfs.c diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 75285ea9aa05..f52cf54f0cbc 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -8,7 +8,7 @@ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ mmp.o indirect.o extents_status.o xattr.o xattr_user.o \ - xattr_trusted.o inline.o readpage.o + xattr_trusted.o inline.o readpage.o sysfs.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index fd1f28be5296..76daccf38aa3 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2903,6 +2903,11 @@ extern const struct inode_operations ext4_encrypted_symlink_inode_operations; extern const struct inode_operations ext4_symlink_inode_operations; extern const struct inode_operations ext4_fast_symlink_inode_operations; +/* sysfs.c */ +extern int ext4_register_sysfs(struct super_block *sb); +extern int __init ext4_init_sysfs(void); +extern void ext4_exit_sysfs(void); + /* block_validity */ extern void ext4_release_system_zone(struct super_block *sb); extern int ext4_setup_system_zone(struct super_block *sb); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a63c7b0a10cf..4a574fbeffec 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -55,10 +55,8 @@ #include static struct proc_dir_entry *ext4_proc_root; -static struct kset *ext4_kset; static struct ext4_lazy_init *ext4_li_info; static struct mutex ext4_li_mtx; -static struct ext4_features *ext4_feat; static int ext4_mballoc_ready; static struct ratelimit_state ext4_mount_msg_ratelimit; @@ -83,7 +81,6 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly); static void ext4_destroy_lazyinit_thread(void); static void ext4_unregister_li_request(struct super_block *sb); static void ext4_clear_request_list(void); -static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t); #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) static struct file_system_type ext2_fs_type = { @@ -2470,335 +2467,6 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) return ret; } -/* sysfs supprt */ - -struct ext4_attr { - struct attribute attr; - ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); - ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, - const char *, size_t); - union { - int offset; - int deprecated_val; - } u; -}; - -static int parse_strtoull(const char *buf, - unsigned long long max, unsigned long long *value) -{ - int ret; - - ret = kstrtoull(skip_spaces(buf), 0, value); - if (!ret && *value > max) - ret = -EINVAL; - return ret; -} - -static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, - char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%llu\n", - (s64) EXT4_C2B(sbi, - percpu_counter_sum(&sbi->s_dirtyclusters_counter))); -} - -static ssize_t session_write_kbytes_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - struct super_block *sb = sbi->s_buddy_cache->i_sb; - - if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); - return snprintf(buf, PAGE_SIZE, "%lu\n", - (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - - sbi->s_sectors_written_start) >> 1); -} - -static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - struct super_block *sb = sbi->s_buddy_cache->i_sb; - - if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)(sbi->s_kbytes_written + - ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - - EXT4_SB(sb)->s_sectors_written_start) >> 1))); -} - -static ssize_t inode_readahead_blks_store(struct ext4_attr *a, - struct ext4_sb_info *sbi, - const char *buf, size_t count) -{ - unsigned long t; - int ret; - - ret = kstrtoul(skip_spaces(buf), 0, &t); - if (ret) - return ret; - - if (t && (!is_power_of_2(t) || t > 0x40000000)) - return -EINVAL; - - sbi->s_inode_readahead_blks = t; - return count; -} - -static ssize_t sbi_ui_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); - - return snprintf(buf, PAGE_SIZE, "%u\n", *ui); -} - -static ssize_t sbi_ui_store(struct ext4_attr *a, - struct ext4_sb_info *sbi, - const char *buf, size_t count) -{ - unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); - unsigned long t; - int ret; - - ret = kstrtoul(skip_spaces(buf), 0, &t); - if (ret) - return ret; - *ui = t; - return count; -} - -static ssize_t es_ui_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - - unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) + - a->u.offset); - - return snprintf(buf, PAGE_SIZE, "%u\n", *ui); -} - -static ssize_t reserved_clusters_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); -} - -static ssize_t reserved_clusters_store(struct ext4_attr *a, - struct ext4_sb_info *sbi, - const char *buf, size_t count) -{ - unsigned long long val; - int ret; - - if (parse_strtoull(buf, -1ULL, &val)) - return -EINVAL; - ret = ext4_reserve_clusters(sbi, val); - - return ret ? ret : count; -} - -static ssize_t trigger_test_error(struct ext4_attr *a, - struct ext4_sb_info *sbi, - const char *buf, size_t count) -{ - int len = count; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (len && buf[len-1] == '\n') - len--; - - if (len) - ext4_error(sbi->s_sb, "%.*s", len, buf); - return count; -} - -static ssize_t sbi_deprecated_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val); -} - -#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ -static struct ext4_attr ext4_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ - .u = { \ - .offset = offsetof(struct ext4_sb_info, _elname),\ - }, \ -} - -#define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \ -static struct ext4_attr ext4_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ - .u = { \ - .offset = offsetof(struct ext4_super_block, _elname), \ - }, \ -} - -#define EXT4_ATTR(name, mode, show, store) \ -static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) - -#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) -#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) -#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) - -#define EXT4_RO_ATTR_ES_UI(name, elname) \ - EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname) -#define EXT4_RW_ATTR_SBI_UI(name, elname) \ - EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) - -#define ATTR_LIST(name) &ext4_attr_##name.attr -#define EXT4_DEPRECATED_ATTR(_name, _val) \ -static struct ext4_attr ext4_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = 0444 }, \ - .show = sbi_deprecated_show, \ - .u = { \ - .deprecated_val = _val, \ - }, \ -} - -EXT4_RO_ATTR(delayed_allocation_blocks); -EXT4_RO_ATTR(session_write_kbytes); -EXT4_RO_ATTR(lifetime_write_kbytes); -EXT4_RW_ATTR(reserved_clusters); -EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, - inode_readahead_blks_store, s_inode_readahead_blks); -EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); -EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); -EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); -EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); -EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); -EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); -EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); -EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); -EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); -EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); -EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); -EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); -EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); -EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); -EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); -EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); -EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); -EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); -EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); - -static struct attribute *ext4_attrs[] = { - ATTR_LIST(delayed_allocation_blocks), - ATTR_LIST(session_write_kbytes), - ATTR_LIST(lifetime_write_kbytes), - ATTR_LIST(reserved_clusters), - ATTR_LIST(inode_readahead_blks), - ATTR_LIST(inode_goal), - ATTR_LIST(mb_stats), - ATTR_LIST(mb_max_to_scan), - ATTR_LIST(mb_min_to_scan), - ATTR_LIST(mb_order2_req), - ATTR_LIST(mb_stream_req), - ATTR_LIST(mb_group_prealloc), - ATTR_LIST(max_writeback_mb_bump), - ATTR_LIST(extent_max_zeroout_kb), - ATTR_LIST(trigger_fs_error), - ATTR_LIST(err_ratelimit_interval_ms), - ATTR_LIST(err_ratelimit_burst), - ATTR_LIST(warning_ratelimit_interval_ms), - ATTR_LIST(warning_ratelimit_burst), - ATTR_LIST(msg_ratelimit_interval_ms), - ATTR_LIST(msg_ratelimit_burst), - ATTR_LIST(errors_count), - ATTR_LIST(first_error_time), - ATTR_LIST(last_error_time), - NULL, -}; - -/* Features this copy of ext4 supports */ -EXT4_INFO_ATTR(lazy_itable_init); -EXT4_INFO_ATTR(batched_discard); -EXT4_INFO_ATTR(meta_bg_resize); -EXT4_INFO_ATTR(encryption); - -static struct attribute *ext4_feat_attrs[] = { - ATTR_LIST(lazy_itable_init), - ATTR_LIST(batched_discard), - ATTR_LIST(meta_bg_resize), - ATTR_LIST(encryption), - NULL, -}; - -static ssize_t ext4_attr_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, - s_kobj); - struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); - - return a->show ? a->show(a, sbi, buf) : 0; -} - -static ssize_t ext4_attr_store(struct kobject *kobj, - struct attribute *attr, - const char *buf, size_t len) -{ - struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, - s_kobj); - struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); - - return a->store ? a->store(a, sbi, buf, len) : 0; -} - -static void ext4_sb_release(struct kobject *kobj) -{ - struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, - s_kobj); - complete(&sbi->s_kobj_unregister); -} - -static const struct sysfs_ops ext4_attr_ops = { - .show = ext4_attr_show, - .store = ext4_attr_store, -}; - -static struct kobj_type ext4_ktype = { - .default_attrs = ext4_attrs, - .sysfs_ops = &ext4_attr_ops, - .release = ext4_sb_release, -}; - -static void ext4_feat_release(struct kobject *kobj) -{ - complete(&ext4_feat->f_kobj_unregister); -} - -static ssize_t ext4_feat_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "supported\n"); -} - -/* - * We can not use ext4_attr_show/store because it relies on the kobject - * being embedded in the ext4_sb_info structure which is definitely not - * true in this case. - */ -static const struct sysfs_ops ext4_feat_ops = { - .show = ext4_feat_show, - .store = NULL, -}; - -static struct kobj_type ext4_feat_ktype = { - .default_attrs = ext4_feat_attrs, - .sysfs_ops = &ext4_feat_ops, - .release = ext4_feat_release, -}; - /* * Check whether this filesystem can be mounted based on * the features present and the RDONLY/RDWR mount requested. @@ -3403,10 +3071,10 @@ int ext4_calculate_overhead(struct super_block *sb) return 0; } - -static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) +static void ext4_set_resv_clusters(struct super_block *sb) { ext4_fsblk_t resv_clusters; + struct ext4_sb_info *sbi = EXT4_SB(sb); /* * There's no need to reserve anything when we aren't using extents. @@ -3415,7 +3083,7 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) * to keep ext2/3 backward compatibility. */ if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) - return 0; + return; /* * By default we reserve 2% or 4096 clusters, whichever is smaller. * This should cover the situations where we can not afford to run @@ -3424,26 +3092,13 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) * allocation would require 1, or 2 blocks, higher numbers are * very rare. */ - resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >> - EXT4_SB(sb)->s_cluster_bits; + resv_clusters = (ext4_blocks_count(sbi->s_es) >> + sbi->s_cluster_bits); do_div(resv_clusters, 50); resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); - return resv_clusters; -} - - -static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count) -{ - ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >> - sbi->s_cluster_bits; - - if (count >= clusters) - return -EINVAL; - - atomic64_set(&sbi->s_resv_clusters, count); - return 0; + atomic64_set(&sbi->s_resv_clusters, resv_clusters); } static int ext4_fill_super(struct super_block *sb, void *data, int silent) @@ -4192,12 +3847,7 @@ no_journal: "available"); } - err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb)); - if (err) { - ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " - "reserved pool", ext4_calculate_resv_clusters(sb)); - goto failed_mount4a; - } + ext4_set_resv_clusters(sb); err = ext4_setup_system_zone(sb); if (err) { @@ -4248,10 +3898,7 @@ no_journal: if (err) goto failed_mount6; - sbi->s_kobj.kset = ext4_kset; - init_completion(&sbi->s_kobj_unregister); - err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, - "%s", sb->s_id); + err = ext4_register_sysfs(sb); if (err) goto failed_mount7; @@ -5586,37 +5233,6 @@ static struct file_system_type ext4_fs_type = { }; MODULE_ALIAS_FS("ext4"); -static int __init ext4_init_feat_adverts(void) -{ - struct ext4_features *ef; - int ret = -ENOMEM; - - ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL); - if (!ef) - goto out; - - ef->f_kobj.kset = ext4_kset; - init_completion(&ef->f_kobj_unregister); - ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL, - "features"); - if (ret) { - kfree(ef); - goto out; - } - - ext4_feat = ef; - ret = 0; -out: - return ret; -} - -static void ext4_exit_feat_adverts(void) -{ - kobject_put(&ext4_feat->f_kobj); - wait_for_completion(&ext4_feat->f_kobj_unregister); - kfree(ext4_feat); -} - /* Shared across all ext4 file systems */ wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; @@ -5643,21 +5259,16 @@ static int __init ext4_init_fs(void) err = ext4_init_pageio(); if (err) - goto out7; + goto out5; err = ext4_init_system_zone(); if (err) - goto out6; - ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); - if (!ext4_kset) { - err = -ENOMEM; - goto out5; - } + goto out4; ext4_proc_root = proc_mkdir("fs/ext4", NULL); - err = ext4_init_feat_adverts(); + err = ext4_init_sysfs(); if (err) - goto out4; + goto out3; err = ext4_init_mballoc(); if (err) @@ -5682,16 +5293,14 @@ out1: ext4_mballoc_ready = 0; ext4_exit_mballoc(); out2: - ext4_exit_feat_adverts(); -out4: + ext4_exit_sysfs(); +out3: if (ext4_proc_root) remove_proc_entry("fs/ext4", NULL); - kset_unregister(ext4_kset); -out5: ext4_exit_system_zone(); -out6: +out4: ext4_exit_pageio(); -out7: +out5: ext4_exit_es(); return err; @@ -5706,9 +5315,8 @@ static void __exit ext4_exit_fs(void) unregister_filesystem(&ext4_fs_type); destroy_inodecache(); ext4_exit_mballoc(); - ext4_exit_feat_adverts(); + ext4_exit_sysfs(); remove_proc_entry("fs/ext4", NULL); - kset_unregister(ext4_kset); ext4_exit_system_zone(); ext4_exit_pageio(); ext4_exit_es(); diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c new file mode 100644 index 000000000000..9fdc21b9b265 --- /dev/null +++ b/fs/ext4/sysfs.c @@ -0,0 +1,387 @@ +/* + * linux/fs/ext4/sysfs.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Theodore Ts'o (tytso@mit.edu) + * + */ + +#include +#include +#include +#include + +#include "ext4.h" +#include "ext4_jbd2.h" + +struct ext4_attr { + struct attribute attr; + ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); + ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, + const char *, size_t); + union { + int offset; + int deprecated_val; + } u; +}; + +static int parse_strtoull(const char *buf, + unsigned long long max, unsigned long long *value) +{ + int ret; + + ret = kstrtoull(skip_spaces(buf), 0, value); + if (!ret && *value > max) + ret = -EINVAL; + return ret; +} + +static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (s64) EXT4_C2B(sbi, + percpu_counter_sum(&sbi->s_dirtyclusters_counter))); +} + +static ssize_t session_write_kbytes_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, char *buf) +{ + struct super_block *sb = sbi->s_buddy_cache->i_sb; + + if (!sb->s_bdev->bd_part) + return snprintf(buf, PAGE_SIZE, "0\n"); + return snprintf(buf, PAGE_SIZE, "%lu\n", + (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - + sbi->s_sectors_written_start) >> 1); +} + +static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, char *buf) +{ + struct super_block *sb = sbi->s_buddy_cache->i_sb; + + if (!sb->s_bdev->bd_part) + return snprintf(buf, PAGE_SIZE, "0\n"); + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)(sbi->s_kbytes_written + + ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - + EXT4_SB(sb)->s_sectors_written_start) >> 1))); +} + +static ssize_t inode_readahead_blks_store(struct ext4_attr *a, + struct ext4_sb_info *sbi, + const char *buf, size_t count) +{ + unsigned long t; + int ret; + + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret) + return ret; + + if (t && (!is_power_of_2(t) || t > 0x40000000)) + return -EINVAL; + + sbi->s_inode_readahead_blks = t; + return count; +} + +static ssize_t sbi_ui_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, char *buf) +{ + unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); + + return snprintf(buf, PAGE_SIZE, "%u\n", *ui); +} + +static ssize_t sbi_ui_store(struct ext4_attr *a, + struct ext4_sb_info *sbi, + const char *buf, size_t count) +{ + unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); + unsigned long t; + int ret; + + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret) + return ret; + *ui = t; + return count; +} + +static ssize_t es_ui_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, char *buf) +{ + + unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) + + a->u.offset); + + return snprintf(buf, PAGE_SIZE, "%u\n", *ui); +} + +static ssize_t reserved_clusters_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); +} + +static ssize_t reserved_clusters_store(struct ext4_attr *a, + struct ext4_sb_info *sbi, + const char *buf, size_t count) +{ + unsigned long long val; + ext4_fsblk_t clusters = (ext4_blocks_count(sbi->s_es) >> + sbi->s_cluster_bits); + + if (parse_strtoull(buf, -1ULL, &val)) + return -EINVAL; + + if (val >= clusters) + return -EINVAL; + + atomic64_set(&sbi->s_resv_clusters, val); + return count; +} + +static ssize_t trigger_test_error(struct ext4_attr *a, + struct ext4_sb_info *sbi, + const char *buf, size_t count) +{ + int len = count; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (len && buf[len-1] == '\n') + len--; + + if (len) + ext4_error(sbi->s_sb, "%.*s", len, buf); + return count; +} + +static ssize_t sbi_deprecated_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val); +} + +#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ +static struct ext4_attr ext4_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .u = { \ + .offset = offsetof(struct ext4_sb_info, _elname),\ + }, \ +} + +#define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \ +static struct ext4_attr ext4_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .u = { \ + .offset = offsetof(struct ext4_super_block, _elname), \ + }, \ +} + +#define EXT4_ATTR(name, mode, show, store) \ +static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) + +#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) +#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) +#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) + +#define EXT4_RO_ATTR_ES_UI(name, elname) \ + EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname) +#define EXT4_RW_ATTR_SBI_UI(name, elname) \ + EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) + +#define ATTR_LIST(name) &ext4_attr_##name.attr +#define EXT4_DEPRECATED_ATTR(_name, _val) \ +static struct ext4_attr ext4_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = 0444 }, \ + .show = sbi_deprecated_show, \ + .u = { \ + .deprecated_val = _val, \ + }, \ +} + +EXT4_RO_ATTR(delayed_allocation_blocks); +EXT4_RO_ATTR(session_write_kbytes); +EXT4_RO_ATTR(lifetime_write_kbytes); +EXT4_RW_ATTR(reserved_clusters); +EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, + inode_readahead_blks_store, s_inode_readahead_blks); +EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); +EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); +EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); +EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); +EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); +EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); +EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); +EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); +EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); +EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); +EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); +EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); +EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); +EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); +EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); + +static struct attribute *ext4_attrs[] = { + ATTR_LIST(delayed_allocation_blocks), + ATTR_LIST(session_write_kbytes), + ATTR_LIST(lifetime_write_kbytes), + ATTR_LIST(reserved_clusters), + ATTR_LIST(inode_readahead_blks), + ATTR_LIST(inode_goal), + ATTR_LIST(mb_stats), + ATTR_LIST(mb_max_to_scan), + ATTR_LIST(mb_min_to_scan), + ATTR_LIST(mb_order2_req), + ATTR_LIST(mb_stream_req), + ATTR_LIST(mb_group_prealloc), + ATTR_LIST(max_writeback_mb_bump), + ATTR_LIST(extent_max_zeroout_kb), + ATTR_LIST(trigger_fs_error), + ATTR_LIST(err_ratelimit_interval_ms), + ATTR_LIST(err_ratelimit_burst), + ATTR_LIST(warning_ratelimit_interval_ms), + ATTR_LIST(warning_ratelimit_burst), + ATTR_LIST(msg_ratelimit_interval_ms), + ATTR_LIST(msg_ratelimit_burst), + ATTR_LIST(errors_count), + ATTR_LIST(first_error_time), + ATTR_LIST(last_error_time), + NULL, +}; + +/* Features this copy of ext4 supports */ +EXT4_INFO_ATTR(lazy_itable_init); +EXT4_INFO_ATTR(batched_discard); +EXT4_INFO_ATTR(meta_bg_resize); +EXT4_INFO_ATTR(encryption); + +static struct attribute *ext4_feat_attrs[] = { + ATTR_LIST(lazy_itable_init), + ATTR_LIST(batched_discard), + ATTR_LIST(meta_bg_resize), + ATTR_LIST(encryption), + NULL, +}; + +static ssize_t ext4_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, + s_kobj); + struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); + + return a->show ? a->show(a, sbi, buf) : 0; +} + +static ssize_t ext4_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, + s_kobj); + struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); + + return a->store ? a->store(a, sbi, buf, len) : 0; +} + +static void ext4_sb_release(struct kobject *kobj) +{ + struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, + s_kobj); + complete(&sbi->s_kobj_unregister); +} + +static const struct sysfs_ops ext4_attr_ops = { + .show = ext4_attr_show, + .store = ext4_attr_store, +}; + +static struct kobj_type ext4_sb_ktype = { + .default_attrs = ext4_attrs, + .sysfs_ops = &ext4_attr_ops, + .release = ext4_sb_release, +}; + +static struct kobj_type ext4_ktype = { + .sysfs_ops = &ext4_attr_ops, +}; + +static struct kset ext4_kset = { + .kobj = {.ktype = &ext4_ktype}, +}; + +static ssize_t ext4_feat_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "supported\n"); +} + +/* + * We can not use ext4_attr_show/store because it relies on the kobject + * being embedded in the ext4_sb_info structure which is definitely not + * true in this case. + */ +static const struct sysfs_ops ext4_feat_ops = { + .show = ext4_feat_show, + .store = NULL, +}; + +static struct kobj_type ext4_feat_ktype = { + .default_attrs = ext4_feat_attrs, + .sysfs_ops = &ext4_feat_ops, +}; + +static struct kobject ext4_feat = { + .kset = &ext4_kset, +}; + +int ext4_register_sysfs(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + sbi->s_kobj.kset = &ext4_kset; + init_completion(&sbi->s_kobj_unregister); + return kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, NULL, + "%s", sb->s_id); +} + +int __init ext4_init_sysfs(void) +{ + int ret; + + kobject_set_name(&ext4_kset.kobj, "ext4"); + ext4_kset.kobj.parent = fs_kobj; + ret = kset_register(&ext4_kset); + if (ret) + return ret; + + ret = kobject_init_and_add(&ext4_feat, &ext4_feat_ktype, + NULL, "features"); + if (ret) + kset_unregister(&ext4_kset); + return ret; +} + +void ext4_exit_sysfs(void) +{ + kobject_put(&ext4_feat); + kset_unregister(&ext4_kset); +} + From 76d33bca5581b1dd5c3157fa168db849a784ada4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 23 Sep 2015 12:45:17 -0400 Subject: [PATCH 02/26] ext4: refactor sysfs support code Make the code more easily extensible as well as taking up less compiled space. Signed-off-by: Theodore Ts'o --- fs/ext4/sysfs.c | 267 ++++++++++++++++++++++++------------------------ 1 file changed, 131 insertions(+), 136 deletions(-) diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 9fdc21b9b265..87ac08964683 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -15,37 +15,35 @@ #include "ext4.h" #include "ext4_jbd2.h" +typedef enum { + attr_noop, + attr_delayed_allocation_blocks, + attr_session_write_kbytes, + attr_lifetime_write_kbytes, + attr_reserved_clusters, + attr_inode_readahead, + attr_trigger_test_error, + attr_feature, + attr_pointer_ui, + attr_pointer_atomic, +} attr_id_t; + +typedef enum { + ptr_explicit, + ptr_ext4_sb_info_offset, + ptr_ext4_super_block_offset, +} attr_ptr_t; + struct ext4_attr { struct attribute attr; - ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); - ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, - const char *, size_t); + short attr_id; + short attr_ptr; union { int offset; - int deprecated_val; + void *explicit_ptr; } u; }; -static int parse_strtoull(const char *buf, - unsigned long long max, unsigned long long *value) -{ - int ret; - - ret = kstrtoull(skip_spaces(buf), 0, value); - if (!ret && *value > max) - ret = -EINVAL; - return ret; -} - -static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, - char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%llu\n", - (s64) EXT4_C2B(sbi, - percpu_counter_sum(&sbi->s_dirtyclusters_counter))); -} - static ssize_t session_write_kbytes_show(struct ext4_attr *a, struct ext4_sb_info *sbi, char *buf) { @@ -89,46 +87,6 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, return count; } -static ssize_t sbi_ui_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); - - return snprintf(buf, PAGE_SIZE, "%u\n", *ui); -} - -static ssize_t sbi_ui_store(struct ext4_attr *a, - struct ext4_sb_info *sbi, - const char *buf, size_t count) -{ - unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); - unsigned long t; - int ret; - - ret = kstrtoul(skip_spaces(buf), 0, &t); - if (ret) - return ret; - *ui = t; - return count; -} - -static ssize_t es_ui_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - - unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) + - a->u.offset); - - return snprintf(buf, PAGE_SIZE, "%u\n", *ui); -} - -static ssize_t reserved_clusters_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); -} - static ssize_t reserved_clusters_store(struct ext4_attr *a, struct ext4_sb_info *sbi, const char *buf, size_t count) @@ -136,11 +94,10 @@ static ssize_t reserved_clusters_store(struct ext4_attr *a, unsigned long long val; ext4_fsblk_t clusters = (ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits); + int ret; - if (parse_strtoull(buf, -1ULL, &val)) - return -EINVAL; - - if (val >= clusters) + ret = kstrtoull(skip_spaces(buf), 0, &val); + if (!ret || val >= clusters) return -EINVAL; atomic64_set(&sbi->s_resv_clusters, val); @@ -164,60 +121,51 @@ static ssize_t trigger_test_error(struct ext4_attr *a, return count; } -static ssize_t sbi_deprecated_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val); -} - -#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ -static struct ext4_attr ext4_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ - .u = { \ - .offset = offsetof(struct ext4_sb_info, _elname),\ - }, \ -} - -#define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \ +#define EXT4_ATTR(_name,_mode,_id) \ static struct ext4_attr ext4_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ - .u = { \ - .offset = offsetof(struct ext4_super_block, _elname), \ - }, \ + .attr_id = attr_##_id, \ } -#define EXT4_ATTR(name, mode, show, store) \ -static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) +#define EXT4_ATTR_FUNC(_name,_mode) EXT4_ATTR(_name,_mode,_name) -#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) -#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) -#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) +#define EXT4_ATTR_FEATURE(_name) EXT4_ATTR(_name, 0444, feature) -#define EXT4_RO_ATTR_ES_UI(name, elname) \ - EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname) -#define EXT4_RW_ATTR_SBI_UI(name, elname) \ - EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) - -#define ATTR_LIST(name) &ext4_attr_##name.attr -#define EXT4_DEPRECATED_ATTR(_name, _val) \ +#define EXT4_ATTR_OFFSET(_name,_mode,_id,_struct,_elname) \ static struct ext4_attr ext4_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = 0444 }, \ - .show = sbi_deprecated_show, \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr_id = attr_##_id, \ + .attr_ptr = ptr_##_struct##_offset, \ .u = { \ - .deprecated_val = _val, \ + .offset = offsetof(struct _struct, _elname),\ }, \ } -EXT4_RO_ATTR(delayed_allocation_blocks); -EXT4_RO_ATTR(session_write_kbytes); -EXT4_RO_ATTR(lifetime_write_kbytes); -EXT4_RW_ATTR(reserved_clusters); -EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, - inode_readahead_blks_store, s_inode_readahead_blks); +#define EXT4_RO_ATTR_ES_UI(_name,_elname) \ + EXT4_ATTR_OFFSET(_name, 0444, pointer_ui, ext4_super_block, _elname) + +#define EXT4_RW_ATTR_SBI_UI(_name,_elname) \ + EXT4_ATTR_OFFSET(_name, 0644, pointer_ui, ext4_sb_info, _elname) + +#define EXT4_ATTR_PTR(_name,_mode,_id,_ptr) \ +static struct ext4_attr ext4_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr_id = attr_##_id, \ + .attr_ptr = ptr_explicit, \ + .u = { \ + .explicit_ptr = _ptr, \ + }, \ +} + +#define ATTR_LIST(name) &ext4_attr_##name.attr + +EXT4_ATTR_FUNC(delayed_allocation_blocks, 0444); +EXT4_ATTR_FUNC(session_write_kbytes, 0444); +EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444); +EXT4_ATTR_FUNC(reserved_clusters, 0644); + +EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead, + ext4_sb_info, s_inode_readahead_blks); EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); @@ -225,9 +173,8 @@ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); -EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); -EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); +EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error); EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); @@ -238,6 +185,9 @@ EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); +static unsigned int old_bump_val = 128; +EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val); + static struct attribute *ext4_attrs[] = { ATTR_LIST(delayed_allocation_blocks), ATTR_LIST(session_write_kbytes), @@ -267,10 +217,10 @@ static struct attribute *ext4_attrs[] = { }; /* Features this copy of ext4 supports */ -EXT4_INFO_ATTR(lazy_itable_init); -EXT4_INFO_ATTR(batched_discard); -EXT4_INFO_ATTR(meta_bg_resize); -EXT4_INFO_ATTR(encryption); +EXT4_ATTR_FEATURE(lazy_itable_init); +EXT4_ATTR_FEATURE(batched_discard); +EXT4_ATTR_FEATURE(meta_bg_resize); +EXT4_ATTR_FEATURE(encryption); static struct attribute *ext4_feat_attrs[] = { ATTR_LIST(lazy_itable_init), @@ -280,14 +230,56 @@ static struct attribute *ext4_feat_attrs[] = { NULL, }; +static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi) +{ + switch (a->attr_ptr) { + case ptr_explicit: + return a->u.explicit_ptr; + case ptr_ext4_sb_info_offset: + return (void *) (((char *) sbi) + a->u.offset); + case ptr_ext4_super_block_offset: + return (void *) (((char *) sbi->s_es) + a->u.offset); + } + return NULL; +} + static ssize_t ext4_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, s_kobj); struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); + void *ptr = calc_ptr(a, sbi); - return a->show ? a->show(a, sbi, buf) : 0; + switch (a->attr_id) { + case attr_delayed_allocation_blocks: + return snprintf(buf, PAGE_SIZE, "%llu\n", + (s64) EXT4_C2B(sbi, + percpu_counter_sum(&sbi->s_dirtyclusters_counter))); + case attr_session_write_kbytes: + return session_write_kbytes_show(a, sbi, buf); + case attr_lifetime_write_kbytes: + return lifetime_write_kbytes_show(a, sbi, buf); + case attr_reserved_clusters: + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long) + atomic64_read(&sbi->s_resv_clusters)); + case attr_inode_readahead: + case attr_pointer_ui: + if (!ptr) + return 0; + return snprintf(buf, PAGE_SIZE, "%u\n", + *((unsigned int *) ptr)); + case attr_pointer_atomic: + if (!ptr) + return 0; + return snprintf(buf, PAGE_SIZE, "%d\n", + atomic_read((atomic_t *) ptr)); + case attr_feature: + return snprintf(buf, PAGE_SIZE, "supported\n"); + } + + return 0; } static ssize_t ext4_attr_store(struct kobject *kobj, @@ -297,8 +289,27 @@ static ssize_t ext4_attr_store(struct kobject *kobj, struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, s_kobj); struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); + void *ptr = calc_ptr(a, sbi); + unsigned long t; + int ret; - return a->store ? a->store(a, sbi, buf, len) : 0; + switch (a->attr_id) { + case attr_reserved_clusters: + return reserved_clusters_store(a, sbi, buf, len); + case attr_pointer_ui: + if (!ptr) + return 0; + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret) + return ret; + *((unsigned int *) ptr) = t; + return len; + case attr_inode_readahead: + return inode_readahead_blks_store(a, sbi, buf, len); + case attr_trigger_test_error: + return trigger_test_error(a, sbi, buf, len); + } + return 0; } static void ext4_sb_release(struct kobject *kobj) @@ -327,25 +338,9 @@ static struct kset ext4_kset = { .kobj = {.ktype = &ext4_ktype}, }; -static ssize_t ext4_feat_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "supported\n"); -} - -/* - * We can not use ext4_attr_show/store because it relies on the kobject - * being embedded in the ext4_sb_info structure which is definitely not - * true in this case. - */ -static const struct sysfs_ops ext4_feat_ops = { - .show = ext4_feat_show, - .store = NULL, -}; - static struct kobj_type ext4_feat_ktype = { .default_attrs = ext4_feat_attrs, - .sysfs_ops = &ext4_feat_ops, + .sysfs_ops = &ext4_attr_ops, }; static struct kobject ext4_feat = { From ebd173beb8db5b8b315fa1c5bbac86c54059397a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 23 Sep 2015 12:46:17 -0400 Subject: [PATCH 03/26] ext4: move procfs registration code to fs/ext4/sysfs.c This allows us to refactor the procfs code, which saves a bit of compiled space. More importantly it isolates most of the procfs support code into a single file, so it's easier to #ifdef it out if the proc file system has been disabled. Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 3 ++ fs/ext4/extents_status.c | 60 ++--------------------------------- fs/ext4/extents_status.h | 2 ++ fs/ext4/mballoc.c | 9 +----- fs/ext4/super.c | 41 ++---------------------- fs/ext4/sysfs.c | 68 ++++++++++++++++++++++++++++++++++++++-- 6 files changed, 77 insertions(+), 106 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 76daccf38aa3..766b7f7b782b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2245,6 +2245,7 @@ extern int ext4_init_inode_table(struct super_block *sb, extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); /* mballoc.c */ +extern const struct file_operations ext4_seq_mb_groups_fops; extern long ext4_mb_stats; extern long ext4_mb_max_to_scan; extern int ext4_mb_init(struct super_block *); @@ -2372,6 +2373,7 @@ extern int ext4_group_extend(struct super_block *sb, extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ +extern int ext4_seq_options_show(struct seq_file *seq, void *offset); extern int ext4_calculate_overhead(struct super_block *sb); extern void ext4_superblock_csum_set(struct super_block *sb); extern void *ext4_kvmalloc(size_t size, gfp_t flags); @@ -2905,6 +2907,7 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations; /* sysfs.c */ extern int ext4_register_sysfs(struct super_block *sb); +extern void ext4_unregister_sysfs(struct super_block *sb); extern int __init ext4_init_sysfs(void); extern void ext4_exit_sysfs(void); diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 26724aeece73..ac748b3af1c1 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -1089,20 +1089,9 @@ static unsigned long ext4_es_scan(struct shrinker *shrink, return nr_shrunk; } -static void *ext4_es_seq_shrinker_info_start(struct seq_file *seq, loff_t *pos) +int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v) { - return *pos ? NULL : SEQ_START_TOKEN; -} - -static void * -ext4_es_seq_shrinker_info_next(struct seq_file *seq, void *v, loff_t *pos) -{ - return NULL; -} - -static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) -{ - struct ext4_sb_info *sbi = seq->private; + struct ext4_sb_info *sbi = EXT4_SB((struct super_block *) seq->private); struct ext4_es_stats *es_stats = &sbi->s_es_stats; struct ext4_inode_info *ei, *max = NULL; unsigned int inode_cnt = 0; @@ -1143,45 +1132,6 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) return 0; } -static void ext4_es_seq_shrinker_info_stop(struct seq_file *seq, void *v) -{ -} - -static const struct seq_operations ext4_es_seq_shrinker_info_ops = { - .start = ext4_es_seq_shrinker_info_start, - .next = ext4_es_seq_shrinker_info_next, - .stop = ext4_es_seq_shrinker_info_stop, - .show = ext4_es_seq_shrinker_info_show, -}; - -static int -ext4_es_seq_shrinker_info_open(struct inode *inode, struct file *file) -{ - int ret; - - ret = seq_open(file, &ext4_es_seq_shrinker_info_ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = PDE_DATA(inode); - } - - return ret; -} - -static int -ext4_es_seq_shrinker_info_release(struct inode *inode, struct file *file) -{ - return seq_release(inode, file); -} - -static const struct file_operations ext4_es_seq_shrinker_info_fops = { - .owner = THIS_MODULE, - .open = ext4_es_seq_shrinker_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = ext4_es_seq_shrinker_info_release, -}; - int ext4_es_register_shrinker(struct ext4_sb_info *sbi) { int err; @@ -1210,10 +1160,6 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) if (err) goto err2; - if (sbi->s_proc) - proc_create_data("es_shrinker_info", S_IRUGO, sbi->s_proc, - &ext4_es_seq_shrinker_info_fops, sbi); - return 0; err2: @@ -1225,8 +1171,6 @@ err1: void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) { - if (sbi->s_proc) - remove_proc_entry("es_shrinker_info", sbi->s_proc); percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); unregister_shrinker(&sbi->s_es_shrinker); diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index 691b52613ce4..f7aa24f4642d 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h @@ -172,4 +172,6 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es, extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); +extern int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v); + #endif /* _EXT4_EXTENTS_STATUS_H */ diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 34b610ea5030..b0f7ee57630b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2333,7 +2333,7 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) } -static const struct file_operations ext4_mb_seq_groups_fops = { +const struct file_operations ext4_seq_mb_groups_fops = { .owner = THIS_MODULE, .open = ext4_mb_seq_groups_open, .read = seq_read, @@ -2661,10 +2661,6 @@ int ext4_mb_init(struct super_block *sb) if (ret != 0) goto out_free_locality_groups; - if (sbi->s_proc) - proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, - &ext4_mb_seq_groups_fops, sb); - return 0; out_free_locality_groups: @@ -2705,9 +2701,6 @@ int ext4_mb_release(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); - if (sbi->s_proc) - remove_proc_entry("mb_groups", sbi->s_proc); - if (sbi->s_group_info) { for (i = 0; i < ngroups; i++) { grinfo = ext4_get_group_info(sb, i); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4a574fbeffec..7ef3fa58d3fe 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -54,7 +53,6 @@ #define CREATE_TRACE_POINTS #include -static struct proc_dir_entry *ext4_proc_root; static struct ext4_lazy_init *ext4_li_info; static struct mutex ext4_li_mtx; static int ext4_mballoc_ready; @@ -797,6 +795,7 @@ static void ext4_put_super(struct super_block *sb) ext4_abort(sb, "Couldn't clean up the journal"); } + ext4_unregister_sysfs(sb); ext4_es_unregister_shrinker(sbi); del_timer_sync(&sbi->s_err_report); ext4_release_system_zone(sb); @@ -811,12 +810,6 @@ static void ext4_put_super(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) ext4_commit_super(sb, 1); - if (sbi->s_proc) { - remove_proc_entry("options", sbi->s_proc); - remove_proc_entry(sb->s_id, ext4_proc_root); - } - kobject_del(&sbi->s_kobj); - for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); kvfree(sbi->s_group_desc); @@ -1877,7 +1870,7 @@ static int ext4_show_options(struct seq_file *seq, struct dentry *root) return _ext4_show_options(seq, root->d_sb, 0); } -static int options_seq_show(struct seq_file *seq, void *offset) +int ext4_seq_options_show(struct seq_file *seq, void *offset) { struct super_block *sb = seq->private; int rc; @@ -1888,19 +1881,6 @@ static int options_seq_show(struct seq_file *seq, void *offset) return rc; } -static int options_open_fs(struct inode *inode, struct file *file) -{ - return single_open(file, options_seq_show, PDE_DATA(inode)); -} - -static const struct file_operations ext4_seq_options_fops = { - .owner = THIS_MODULE, - .open = options_open_fs, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, int read_only) { @@ -3616,13 +3596,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - if (ext4_proc_root) - sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); - - if (sbi->s_proc) - proc_create_data("options", S_IRUGO, sbi->s_proc, - &ext4_seq_options_fops, sb); - bgl_lock_init(sbi->s_blockgroup_lock); for (i = 0; i < db_count; i++) { @@ -3960,7 +3933,7 @@ cantfind_ext4: #ifdef CONFIG_QUOTA failed_mount8: - kobject_del(&sbi->s_kobj); + ext4_unregister_sysfs(sb); #endif failed_mount7: ext4_unregister_li_request(sb); @@ -4000,10 +3973,6 @@ failed_mount2: failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - if (sbi->s_proc) { - remove_proc_entry("options", sbi->s_proc); - remove_proc_entry(sb->s_id, ext4_proc_root); - } #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(sbi->s_qf_names[i]); @@ -5264,7 +5233,6 @@ static int __init ext4_init_fs(void) err = ext4_init_system_zone(); if (err) goto out4; - ext4_proc_root = proc_mkdir("fs/ext4", NULL); err = ext4_init_sysfs(); if (err) @@ -5295,8 +5263,6 @@ out1: out2: ext4_exit_sysfs(); out3: - if (ext4_proc_root) - remove_proc_entry("fs/ext4", NULL); ext4_exit_system_zone(); out4: ext4_exit_pageio(); @@ -5316,7 +5282,6 @@ static void __exit ext4_exit_fs(void) destroy_inodecache(); ext4_exit_mballoc(); ext4_exit_sysfs(); - remove_proc_entry("fs/ext4", NULL); ext4_exit_system_zone(); ext4_exit_pageio(); ext4_exit_es(); diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 87ac08964683..62bef0f06421 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -34,6 +34,9 @@ typedef enum { ptr_ext4_super_block_offset, } attr_ptr_t; +static const char *proc_dirname = "fs/ext4"; +static struct proc_dir_entry *ext4_proc_root; + struct ext4_attr { struct attribute attr; short attr_id; @@ -347,14 +350,71 @@ static struct kobject ext4_feat = { .kset = &ext4_kset, }; +#define PROC_FILE_SHOW_DEFN(name) \ +static int name##_open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, ext4_seq_##name##_show, PDE_DATA(inode)); \ +} \ +\ +const struct file_operations ext4_seq_##name##_fops = { \ + .owner = THIS_MODULE, \ + .open = name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} + +#define PROC_FILE_LIST(name) \ + { __stringify(name), &ext4_seq_##name##_fops } + +PROC_FILE_SHOW_DEFN(es_shrinker_info); +PROC_FILE_SHOW_DEFN(options); + +static struct ext4_proc_files { + const char *name; + const struct file_operations *fops; +} proc_files[] = { + PROC_FILE_LIST(options), + PROC_FILE_LIST(es_shrinker_info), + PROC_FILE_LIST(mb_groups), + { NULL, NULL }, +}; + int ext4_register_sysfs(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_proc_files *p; + int err; sbi->s_kobj.kset = &ext4_kset; init_completion(&sbi->s_kobj_unregister); - return kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, NULL, - "%s", sb->s_id); + err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, NULL, + "%s", sb->s_id); + if (err) + return err; + + if (ext4_proc_root) + sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); + + if (sbi->s_proc) { + for (p = proc_files; p->name; p++) + proc_create_data(p->name, S_IRUGO, sbi->s_proc, + p->fops, sb); + } + return 0; +} + +void ext4_unregister_sysfs(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_proc_files *p; + + if (sbi->s_proc) { + for (p = proc_files; p->name; p++) + remove_proc_entry(p->name, sbi->s_proc); + remove_proc_entry(sb->s_id, ext4_proc_root); + } + kobject_del(&sbi->s_kobj); } int __init ext4_init_sysfs(void) @@ -371,6 +431,8 @@ int __init ext4_init_sysfs(void) NULL, "features"); if (ret) kset_unregister(&ext4_kset); + else + ext4_proc_root = proc_mkdir(proc_dirname, NULL); return ret; } @@ -378,5 +440,7 @@ void ext4_exit_sysfs(void) { kobject_put(&ext4_feat); kset_unregister(&ext4_kset); + remove_proc_entry(proc_dirname, NULL); + ext4_proc_root = NULL; } From 937d7b84dca58f2565715f2c8e52f14c3d65fb22 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 2 Oct 2015 23:54:58 -0400 Subject: [PATCH 04/26] ext4 crypto: fix memory leak in ext4_bio_write_page() There are times when ext4_bio_write_page() is called even though we don't actually need to do any I/O. This happens when ext4_writepage() gets called by the jbd2 commit path when an inode needs to force its pages written out in order to provide data=ordered guarantees --- and a page is backed by an unwritten (e.g., uninitialized) block on disk, or if delayed allocation means the page's backing store hasn't been allocated yet. In that case, we need to skip the call to ext4_encrypt_page(), since in addition to wasting CPU, it leads to a bounce page and an ext4 crypto context getting leaked. Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/page-io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 84ba4d2b3a35..17fbe3882b8e 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -425,6 +425,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, struct buffer_head *bh, *head; int ret = 0; int nr_submitted = 0; + int nr_to_submit = 0; blocksize = 1 << inode->i_blkbits; @@ -477,11 +478,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io, unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); } set_buffer_async_write(bh); + nr_to_submit++; } while ((bh = bh->b_this_page) != head); bh = head = page_buffers(page); - if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) && + nr_to_submit) { data_page = ext4_encrypt(inode, page); if (IS_ERR(data_page)) { ret = PTR_ERR(data_page); From cccd147a57e47527ce9c27fcd5f0a1c5669d6870 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Oct 2015 10:49:23 -0400 Subject: [PATCH 05/26] ext4: optimize ext4_writepage() for attempted 4k delalloc writes In cases where the file system block size is the same as the page size, and ext4_writepage() is asked to write out a page which is either has the unwritten bit set in the extent tree, or which does not yet have a block assigned due to delayed allocation, we can bail out early and, unlocking the page earlier and avoiding a round trip through ext4_bio_write_page() with the attendant calls to set_page_writeback() and redirty_page_for_writeback(). Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 612fbcf76b5c..c95d40658979 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1815,11 +1815,22 @@ static int ext4_writepage(struct page *page, * the page. But we may reach here when we do a journal commit via * journal_submit_inode_data_buffers() and in that case we must write * allocated buffers to achieve data=ordered mode guarantees. + * + * Also, if there is only one buffer per page (the fs block + * size == the page size), if one buffer needs block + * allocation or needs to modify the extent tree to clear the + * unwritten flag, we know that the page can't be written at + * all, so we might as well refuse the write immediately. + * Unfortunately if the block size != page size, we can't as + * easily detect this case using ext4_walk_page_buffers(), but + * for the extremely common case, this is an optimization that + * skips a useless round trip through ext4_bio_write_page(). */ if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL, ext4_bh_delay_or_unwritten)) { redirty_page_for_writepage(wbc, page); - if (current->flags & PF_MEMALLOC) { + if ((current->flags & PF_MEMALLOC) || + (inode->i_sb->s_blocksize == PAGE_CACHE_SIZE)) { /* * For memory cleaning there's no point in writing only * some buffers. So just bail out. Warn if we came here From 3684de8ca2b82180f2cdcf5fbcfd3d29ef0afaf8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Oct 2015 10:49:26 -0400 Subject: [PATCH 06/26] ext4 crypto: ext4_page_crypto() doesn't need a encryption context Since ext4_page_crypto() doesn't need an encryption context (at least not any more), this allows us to simplify a number function signature and also allows us to avoid needing to allocate a context in ext4_block_write_begin(). It also means we no longer need a separate ext4_decrypt_one() function. Signed-off-by: Theodore Ts'o --- fs/ext4/crypto.c | 28 +++++----------------------- fs/ext4/ext4.h | 3 +-- fs/ext4/inode.c | 4 ++-- fs/ext4/readpage.c | 2 +- 4 files changed, 9 insertions(+), 28 deletions(-) diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 45731558138c..3a5a7a2597de 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -253,8 +253,7 @@ typedef enum { EXT4_ENCRYPT, } ext4_direction_t; -static int ext4_page_crypto(struct ext4_crypto_ctx *ctx, - struct inode *inode, +static int ext4_page_crypto(struct inode *inode, ext4_direction_t rw, pgoff_t index, struct page *src_page, @@ -353,7 +352,7 @@ struct page *ext4_encrypt(struct inode *inode, if (IS_ERR(ciphertext_page)) goto errout; ctx->w.control_page = plaintext_page; - err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, plaintext_page->index, + err = ext4_page_crypto(inode, EXT4_ENCRYPT, plaintext_page->index, plaintext_page, ciphertext_page); if (err) { ciphertext_page = ERR_PTR(err); @@ -378,31 +377,14 @@ struct page *ext4_encrypt(struct inode *inode, * * Return: Zero on success, non-zero otherwise. */ -int ext4_decrypt(struct ext4_crypto_ctx *ctx, struct page *page) +int ext4_decrypt(struct page *page) { BUG_ON(!PageLocked(page)); - return ext4_page_crypto(ctx, page->mapping->host, + return ext4_page_crypto(page->mapping->host, EXT4_DECRYPT, page->index, page, page); } -/* - * Convenience function which takes care of allocating and - * deallocating the encryption context - */ -int ext4_decrypt_one(struct inode *inode, struct page *page) -{ - int ret; - - struct ext4_crypto_ctx *ctx = ext4_get_crypto_ctx(inode); - - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - ret = ext4_decrypt(ctx, page); - ext4_release_crypto_ctx(ctx); - return ret; -} - int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) { struct ext4_crypto_ctx *ctx; @@ -426,7 +408,7 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) } while (len--) { - err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, lblk, + err = ext4_page_crypto(inode, EXT4_ENCRYPT, lblk, ZERO_PAGE(0), ciphertext_page); if (err) goto errout; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 766b7f7b782b..3f248c9f899f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2063,8 +2063,7 @@ void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx); void ext4_restore_control_page(struct page *data_page); struct page *ext4_encrypt(struct inode *inode, struct page *plaintext_page); -int ext4_decrypt(struct ext4_crypto_ctx *ctx, struct page *page); -int ext4_decrypt_one(struct inode *inode, struct page *page); +int ext4_decrypt(struct page *page); int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex); #ifdef CONFIG_EXT4_FS_ENCRYPTION diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c95d40658979..ae52e329e986 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -965,7 +965,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, if (unlikely(err)) page_zero_new_buffers(page, from, to); else if (decrypt) - err = ext4_decrypt_one(inode, page); + err = ext4_decrypt(page); return err; } #endif @@ -3404,7 +3404,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, /* We expect the key to be set. */ BUG_ON(!ext4_has_encryption_key(inode)); BUG_ON(blocksize != PAGE_CACHE_SIZE); - WARN_ON_ONCE(ext4_decrypt_one(inode, page)); + WARN_ON_ONCE(ext4_decrypt(page)); } } if (ext4_should_journal_data(inode)) { diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index e26803fb210d..a91e4bf6ebc4 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -62,7 +62,7 @@ static void completion_pages(struct work_struct *work) bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; - int ret = ext4_decrypt(ctx, page); + int ret = ext4_decrypt(page); if (ret) { WARN_ON_ONCE(1); SetPageError(page); From 687c3c36e754a999a8263745b27965128db4fee5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Oct 2015 10:49:27 -0400 Subject: [PATCH 07/26] ext4 crypto: replace some BUG_ON()'s with error checks Buggy (or hostile) userspace should not be able to cause the kernel to crash. Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/crypto.c | 1 - fs/ext4/crypto_fname.c | 2 -- fs/ext4/crypto_key.c | 16 +++++++++++++--- fs/ext4/crypto_policy.c | 3 ++- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 3a5a7a2597de..879cb15b7a21 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -295,7 +295,6 @@ static int ext4_page_crypto(struct inode *inode, else res = crypto_ablkcipher_encrypt(req); if (res == -EINPROGRESS || res == -EBUSY) { - BUG_ON(req->base.data != &ecr); wait_for_completion(&ecr.completion); res = ecr.res; } diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c index 847f919c84d9..2fbef8a14760 100644 --- a/fs/ext4/crypto_fname.c +++ b/fs/ext4/crypto_fname.c @@ -120,7 +120,6 @@ static int ext4_fname_encrypt(struct inode *inode, ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv); res = crypto_ablkcipher_encrypt(req); if (res == -EINPROGRESS || res == -EBUSY) { - BUG_ON(req->base.data != &ecr); wait_for_completion(&ecr.completion); res = ecr.res; } @@ -182,7 +181,6 @@ static int ext4_fname_decrypt(struct inode *inode, ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv); res = crypto_ablkcipher_decrypt(req); if (res == -EINPROGRESS || res == -EBUSY) { - BUG_ON(req->base.data != &ecr); wait_for_completion(&ecr.completion); res = ecr.res; } diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c index 1d510c11b100..f9270ec2a132 100644 --- a/fs/ext4/crypto_key.c +++ b/fs/ext4/crypto_key.c @@ -71,7 +71,6 @@ static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE], EXT4_AES_256_XTS_KEY_SIZE, NULL); res = crypto_ablkcipher_encrypt(req); if (res == -EINPROGRESS || res == -EBUSY) { - BUG_ON(req->base.data != &ecr); wait_for_completion(&ecr.completion); res = ecr.res; } @@ -208,7 +207,12 @@ retry: goto out; } crypt_info->ci_keyring_key = keyring_key; - BUG_ON(keyring_key->type != &key_type_logon); + if (keyring_key->type != &key_type_logon) { + printk_once(KERN_WARNING + "ext4: key type must be logon\n"); + res = -ENOKEY; + goto out; + } ukp = ((struct user_key_payload *)keyring_key->payload.data); if (ukp->datalen != sizeof(struct ext4_encryption_key)) { res = -EINVAL; @@ -217,7 +221,13 @@ retry: master_key = (struct ext4_encryption_key *)ukp->data; BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE != EXT4_KEY_DERIVATION_NONCE_SIZE); - BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE); + if (master_key->size != EXT4_AES_256_XTS_KEY_SIZE) { + printk_once(KERN_WARNING + "ext4: key size incorrect: %d\n", + master_key->size); + res = -ENOKEY; + goto out; + } res = ext4_derive_key_aes(ctx.nonce, master_key->raw, raw_key); if (res) diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c index a640ec2c4b13..ad050698143f 100644 --- a/fs/ext4/crypto_policy.c +++ b/fs/ext4/crypto_policy.c @@ -150,7 +150,8 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent, if ((parent == NULL) || (child == NULL)) { pr_err("parent %p child %p\n", parent, child); - BUG_ON(1); + WARN_ON(1); /* Should never happen */ + return 0; } /* no restrictions if the parent directory is not encrypted */ if (!ext4_encrypted_inode(parent)) From 36086d43f6575c081067de9855786a2fc91df77b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Oct 2015 10:49:29 -0400 Subject: [PATCH 08/26] ext4 crypto: fix bugs in ext4_encrypted_zeroout() Fix multiple bugs in ext4_encrypted_zeroout(), including one that could cause us to write an encrypted zero page to the wrong location on disk, potentially causing data and file system corruption. Fortunately, this tends to only show up in stress tests, but even with these fixes, we are seeing some test failures with generic/127 --- but these are now caused by data failures instead of metadata corruption. Since ext4_encrypted_zeroout() is only used for some optimizations to keep the extent tree from being too fragmented, and ext4_encrypted_zeroout() itself isn't all that optimized from a time or IOPS perspective, disable the extent tree optimization for encrypted inodes for now. This prevents the data corruption issues reported by generic/127 until we can figure out what's going wrong. Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/crypto.c | 23 +++++++++++++++++++---- fs/ext4/extents.c | 3 +++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 879cb15b7a21..af06830bfc00 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -392,7 +392,13 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) ext4_lblk_t lblk = ex->ee_block; ext4_fsblk_t pblk = ext4_ext_pblock(ex); unsigned int len = ext4_ext_get_actual_len(ex); - int err = 0; + int ret, err = 0; + +#if 0 + ext4_msg(inode->i_sb, KERN_CRIT, + "ext4_encrypted_zeroout ino %lu lblk %u len %u", + (unsigned long) inode->i_ino, lblk, len); +#endif BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE); @@ -418,17 +424,26 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) goto errout; } bio->bi_bdev = inode->i_sb->s_bdev; - bio->bi_iter.bi_sector = pblk; - err = bio_add_page(bio, ciphertext_page, + bio->bi_iter.bi_sector = + pblk << (inode->i_sb->s_blocksize_bits - 9); + ret = bio_add_page(bio, ciphertext_page, inode->i_sb->s_blocksize, 0); - if (err) { + if (ret != inode->i_sb->s_blocksize) { + /* should never happen! */ + ext4_msg(inode->i_sb, KERN_ERR, + "bio_add_page failed: %d", ret); + WARN_ON(1); bio_put(bio); + err = -EIO; goto errout; } err = submit_bio_wait(WRITE, bio); + if ((err == 0) && bio->bi_error) + err = -EIO; bio_put(bio); if (err) goto errout; + lblk++; pblk++; } err = 0; errout: diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2553aa8b608d..7f486e350d15 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3558,6 +3558,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, max_zeroout = sbi->s_extent_max_zeroout_kb >> (inode->i_sb->s_blocksize_bits - 10); + if (ext4_encrypted_inode(inode)) + max_zeroout = 0; + /* If extent is less than s_max_zeroout_kb, zeroout directly */ if (max_zeroout && (ee_len <= max_zeroout)) { err = ext4_ext_zeroout(inode, ex); From b90197b655185a11640cce3a0a0bc5d8291b8ad2 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 15 Oct 2015 10:29:05 -0400 Subject: [PATCH 09/26] ext4: use private version of page_zero_new_buffers() for data=journal mode If there is a error while copying data from userspace into the page cache during a write(2) system call, in data=journal mode, in ext4_journalled_write_end() were using page_zero_new_buffers() from fs/buffer.c. Unfortunately, this sets the buffer dirty flag, which is no good if journalling is enabled. This is a long-standing bug that goes back for years and years in ext3, but a combination of (a) data=journal not being very common, (b) in many case it only results in a warning message. and (c) only very rarely causes the kernel hang, means that we only really noticed this as a problem when commit 998ef75ddb caused this failure to happen frequently enough to cause generic/208 to fail when run in data=journal mode. The fix is to have our own version of this function that doesn't call mark_dirty_buffer(), since we will end up calling ext4_handle_dirty_metadata() on the buffer head(s) in questions very shortly afterwards in ext4_journalled_write_end(). Thanks to Dave Hansen and Linus Torvalds for helping to identify the root cause of the problem. Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/inode.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ae52e329e986..f205ac3c4e41 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1181,6 +1181,38 @@ errout: return ret ? ret : copied; } +/* + * This is a private version of page_zero_new_buffers() which doesn't + * set the buffer to be dirty, since in data=journalled mode we need + * to call ext4_handle_dirty_metadata() instead. + */ +static void zero_new_buffers(struct page *page, unsigned from, unsigned to) +{ + unsigned int block_start = 0, block_end; + struct buffer_head *head, *bh; + + bh = head = page_buffers(page); + do { + block_end = block_start + bh->b_size; + if (buffer_new(bh)) { + if (block_end > from && block_start < to) { + if (!PageUptodate(page)) { + unsigned start, size; + + start = max(from, block_start); + size = min(to, block_end) - start; + + zero_user(page, start, size); + set_buffer_uptodate(bh); + } + clear_buffer_new(bh); + } + } + block_start = block_end; + bh = bh->b_this_page; + } while (bh != head); +} + static int ext4_journalled_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, @@ -1207,7 +1239,7 @@ static int ext4_journalled_write_end(struct file *file, if (copied < len) { if (!PageUptodate(page)) copied = 0; - page_zero_new_buffers(page, from+copied, to); + zero_new_buffers(page, from+copied, to); } ret = ext4_walk_page_buffers(handle, page_buffers(page), from, From 8595798ca34d186d39abcb277591e541776c0ef5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 15 Oct 2015 10:30:36 -0400 Subject: [PATCH 10/26] jbd2: gate checksum calculations on crc driver presence, not sb flags Change the journal's checksum functions to gate on whether or not the crc32c driver is loaded, and gate the loading on the superblock bits. This prevents a journal crash if someone loads a journal in no-csum mode and then randomizes the superblock, thus flipping on the feature bits. Tested-By: Nikolay Borisov Reported-by: Nikolay Borisov Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 6 +++--- include/linux/jbd2.h | 13 +++++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8270fe9e3641..00f7dbdd64b0 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -124,7 +124,7 @@ EXPORT_SYMBOL(__jbd2_debug); /* Checksumming functions */ static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) { - if (!jbd2_journal_has_csum_v2or3(j)) + if (!jbd2_journal_has_csum_v2or3_feature(j)) return 1; return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; @@ -1531,7 +1531,7 @@ static int journal_get_superblock(journal_t *journal) goto out; } - if (jbd2_journal_has_csum_v2or3(journal) && + if (jbd2_journal_has_csum_v2or3_feature(journal) && JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) { /* Can't have checksum v1 and v2 on at the same time! */ printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 " @@ -1545,7 +1545,7 @@ static int journal_get_superblock(journal_t *journal) } /* Load the checksum driver */ - if (jbd2_journal_has_csum_v2or3(journal)) { + if (jbd2_journal_has_csum_v2or3_feature(journal)) { journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(journal->j_chksum_driver)) { printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index df07e78487d5..6da6f89722e1 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1338,13 +1338,18 @@ static inline int tid_geq(tid_t x, tid_t y) extern int jbd2_journal_blocks_per_page(struct inode *inode); extern size_t journal_tag_bytes(journal_t *journal); +static inline bool jbd2_journal_has_csum_v2or3_feature(journal_t *j) +{ + return JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2) || + JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3); +} + static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) { - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) || - JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) - return 1; + WARN_ON_ONCE(jbd2_journal_has_csum_v2or3_feature(journal) && + journal->j_chksum_driver == NULL); - return 0; + return journal->j_chksum_driver != NULL; } /* From 9172796bc3754af73b90cbd70586812ddbc1e0ca Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 15 Oct 2015 10:33:21 -0400 Subject: [PATCH 11/26] ext4: promote ext4 over ext2 in the default probe order Prevent clean ext3 filesystems from mounting by default with the ext2 driver (with no journal!) by putting ext4 ahead of ext2 in the default probe order. This will have the effect of mounting ext2 filesystems with ext4.ko by default, which is a safer failure than hoping the user notices that their journalled ext3 is now running without a journal! Users who require ext2.ko for ext2 can either disable ext4.ko or explicitly request ext2 via "mount -t ext2" or "rootfstype=ext2". Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o --- fs/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/Makefile b/fs/Makefile index f79cf4043e60..79f522575cba 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -63,10 +63,11 @@ obj-$(CONFIG_DLM) += dlm/ # Do not add any filesystems before this line obj-$(CONFIG_FSCACHE) += fscache/ obj-$(CONFIG_REISERFS_FS) += reiserfs/ -obj-$(CONFIG_EXT2_FS) += ext2/ -# We place ext4 after ext2 so plain ext2 root fs's are mounted using ext2 -# unless explicitly requested by rootfstype obj-$(CONFIG_EXT4_FS) += ext4/ +# We place ext4 before ext2 so that clean ext3 root fs's do NOT mount using the +# ext2 driver, which doesn't know about journalling! Explicitly request ext2 +# by giving the rootfstype= parameter. +obj-$(CONFIG_EXT2_FS) += ext2/ obj-$(CONFIG_JBD2) += jbd2/ obj-$(CONFIG_CRAMFS) += cramfs/ obj-$(CONFIG_SQUASHFS) += squashfs/ From 8b4953e13f4c5d9a3c869f5fca7d51e1700e7db0 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 17 Oct 2015 16:15:18 -0400 Subject: [PATCH 12/26] ext4: reserve code points for the project quota feature Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 8 +++++++- include/uapi/linux/fs.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3f248c9f899f..320f10ef4a23 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -374,6 +374,7 @@ struct flex_groups { #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ +#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ #define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */ @@ -431,6 +432,7 @@ enum { EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */ EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */ + EXT4_INODE_PROJINHERIT = 29, /* Create with parents projid */ EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */ }; @@ -475,6 +477,7 @@ static inline void ext4_check_flag_values(void) CHECK_FLAG_VALUE(EA_INODE); CHECK_FLAG_VALUE(EOFBLOCKS); CHECK_FLAG_VALUE(INLINE_DATA); + CHECK_FLAG_VALUE(PROJINHERIT); CHECK_FLAG_VALUE(RESERVED); } @@ -692,6 +695,7 @@ struct ext4_inode { __le32 i_crtime; /* File Creation time */ __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ __le32 i_version_hi; /* high 32 bits for 64-bit version */ + __le32 i_projid; /* Project ID */ }; struct move_extent { @@ -1179,7 +1183,8 @@ struct ext4_super_block { __u8 s_encrypt_algos[4]; /* Encryption algorithms in use */ __u8 s_encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ __le32 s_lpf_ino; /* Location of the lost+found inode */ - __le32 s_reserved[100]; /* Padding to the end of the block */ + __le32 s_prj_quota_inum; /* inode for tracking project quota */ + __le32 s_reserved[99]; /* Padding to the end of the block */ __le32 s_checksum; /* crc32c(superblock) */ }; @@ -1566,6 +1571,7 @@ static inline int ext4_encrypted_inode(struct inode *inode) */ #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 #define EXT4_FEATURE_RO_COMPAT_READONLY 0x1000 +#define EXT4_FEATURE_RO_COMPAT_PROJECT 0x2000 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 9b964a5920af..f15d980249b5 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -197,6 +197,7 @@ struct inodes_stat_t { #define FS_EXTENT_FL 0x00080000 /* Extents */ #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ From 8c81bd8f586c46eaf114758a78d82895a2b081c2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 17 Oct 2015 16:16:02 -0400 Subject: [PATCH 13/26] ext4: store checksum seed in superblock Allow the filesystem to store the metadata checksum seed in the superblock and add an incompat feature to say that we're using it. This enables tune2fs to change the UUID on a mounted metadata_csum FS without having to (racy!) rewrite all disk metadata. Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 8 +++++--- fs/ext4/super.c | 4 +++- fs/ext4/sysfs.c | 2 ++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 320f10ef4a23..cd832b99f68c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1184,7 +1184,8 @@ struct ext4_super_block { __u8 s_encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ __le32 s_lpf_ino; /* Location of the lost+found inode */ __le32 s_prj_quota_inum; /* inode for tracking project quota */ - __le32 s_reserved[99]; /* Padding to the end of the block */ + __le32 s_checksum_seed; /* crc32c(uuid) if csum_seed set */ + __le32 s_reserved[98]; /* Padding to the end of the block */ __le32 s_checksum; /* crc32c(superblock) */ }; @@ -1584,7 +1585,7 @@ static inline int ext4_encrypted_inode(struct inode *inode) #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ -#define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */ +#define EXT4_FEATURE_INCOMPAT_CSUM_SEED 0x2000 #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ #define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */ #define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000 @@ -1613,7 +1614,8 @@ static inline int ext4_encrypted_inode(struct inode *inode) EXT4_FEATURE_INCOMPAT_FLEX_BG| \ EXT4_FEATURE_INCOMPAT_MMP | \ EXT4_FEATURE_INCOMPAT_INLINE_DATA | \ - EXT4_FEATURE_INCOMPAT_ENCRYPT) + EXT4_FEATURE_INCOMPAT_ENCRYPT | \ + EXT4_FEATURE_INCOMPAT_CSUM_SEED) #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7ef3fa58d3fe..43c0cc802189 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3196,7 +3196,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } /* Precompute checksum seed for all metadata */ - if (ext4_has_metadata_csum(sb)) + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_CSUM_SEED)) + sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed); + else if (ext4_has_metadata_csum(sb)) sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, sizeof(es->s_uuid)); diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 62bef0f06421..1b57c72f4a00 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -224,12 +224,14 @@ EXT4_ATTR_FEATURE(lazy_itable_init); EXT4_ATTR_FEATURE(batched_discard); EXT4_ATTR_FEATURE(meta_bg_resize); EXT4_ATTR_FEATURE(encryption); +EXT4_ATTR_FEATURE(metadata_csum_seed); static struct attribute *ext4_feat_attrs[] = { ATTR_LIST(lazy_itable_init), ATTR_LIST(batched_discard), ATTR_LIST(meta_bg_resize), ATTR_LIST(encryption), + ATTR_LIST(metadata_csum_seed), NULL, }; From 6a797d2737838906f2ea0a31686e87c3151e21ca Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 17 Oct 2015 16:16:04 -0400 Subject: [PATCH 14/26] ext4: call out CRC and corruption errors with specific error codes Instead of overloading EIO for CRC errors and corrupt structures, return the same error codes that XFS returns for the same issues. Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o --- fs/ext4/balloc.c | 2 +- fs/ext4/block_validity.c | 2 +- fs/ext4/dir.c | 4 +-- fs/ext4/ext4.h | 3 ++ fs/ext4/extents.c | 75 ++++++++++++++++++++-------------------- fs/ext4/ialloc.c | 1 + fs/ext4/indirect.c | 2 +- fs/ext4/inode.c | 16 ++++----- fs/ext4/mmp.c | 8 +++-- fs/ext4/namei.c | 28 +++++++-------- fs/ext4/super.c | 10 +++++- fs/ext4/symlink.c | 2 +- fs/ext4/xattr.c | 28 +++++++-------- fs/jbd2/journal.c | 3 +- fs/jbd2/recovery.c | 8 ++--- include/linux/jbd2.h | 3 ++ 16 files changed, 107 insertions(+), 88 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index cd6ea29be645..f831e10a7475 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -203,7 +203,7 @@ static int ext4_init_block_bitmap(struct super_block *sb, count); } set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); - return -EIO; + return -EFSBADCRC; } memset(bh->b_data, 0, sb->s_blocksize); diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 3522340c7a99..02ddec6d8a7d 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -234,7 +234,7 @@ int ext4_check_blockref(const char *function, unsigned int line, es->s_last_error_block = cpu_to_le64(blk); ext4_error_inode(inode, function, line, blk, "invalid block"); - return -EIO; + return -EFSCORRUPTED; } } return 0; diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index f9e14911918c..b29cb707539f 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -621,14 +621,14 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf, while ((char *) de < top) { if (ext4_check_dir_entry(dir, NULL, de, bh, buf, buf_size, offset)) - return -EIO; + return -EFSCORRUPTED; nlen = EXT4_DIR_REC_LEN(de->name_len); rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); de = (struct ext4_dir_entry_2 *)((char *)de + rlen); offset += rlen; } if ((char *) de > top) - return -EIO; + return -EFSCORRUPTED; return 0; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cd832b99f68c..6c0797346da1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3064,4 +3064,7 @@ extern void ext4_resize_end(struct super_block *sb); #endif /* __KERNEL__ */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ + #endif /* _EXT4_H */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7f486e350d15..faeaba8582d7 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -442,7 +442,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, int depth, ext4_fsblk_t pblk) { const char *error_msg; - int max = 0; + int max = 0, err = -EFSCORRUPTED; if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) { error_msg = "invalid magic"; @@ -473,6 +473,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, if (ext_depth(inode) != depth && !ext4_extent_block_csum_verify(inode, eh)) { error_msg = "extent tree corrupted"; + err = -EFSBADCRC; goto corrupted; } return 0; @@ -485,7 +486,7 @@ corrupted: le16_to_cpu(eh->eh_magic), le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), max, le16_to_cpu(eh->eh_depth), depth); - return -EIO; + return err; } #define ext4_ext_check(inode, eh, depth, pblk) \ @@ -910,7 +911,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, put_bh(bh); EXT4_ERROR_INODE(inode, "ppos %d > depth %d", ppos, depth); - ret = -EIO; + ret = -EFSCORRUPTED; goto err; } path[ppos].p_bh = bh; @@ -959,7 +960,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, EXT4_ERROR_INODE(inode, "logical %d == ei_block %d!", logical, le32_to_cpu(curp->p_idx->ei_block)); - return -EIO; + return -EFSCORRUPTED; } if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) @@ -968,7 +969,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, "eh_entries %d >= eh_max %d!", le16_to_cpu(curp->p_hdr->eh_entries), le16_to_cpu(curp->p_hdr->eh_max)); - return -EIO; + return -EFSCORRUPTED; } if (logical > le32_to_cpu(curp->p_idx->ei_block)) { @@ -992,7 +993,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) { EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!"); - return -EIO; + return -EFSCORRUPTED; } ix->ei_block = cpu_to_le32(logical); @@ -1001,7 +1002,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); - return -EIO; + return -EFSCORRUPTED; } err = ext4_ext_dirty(handle, inode, curp); @@ -1042,7 +1043,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, * border from split point */ if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) { EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!"); - return -EIO; + return -EFSCORRUPTED; } if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { border = path[depth].p_ext[1].ee_block; @@ -1086,7 +1087,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, newblock = ablocks[--a]; if (unlikely(newblock == 0)) { EXT4_ERROR_INODE(inode, "newblock == 0!"); - err = -EIO; + err = -EFSCORRUPTED; goto cleanup; } bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); @@ -1112,7 +1113,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!", path[depth].p_hdr->eh_entries, path[depth].p_hdr->eh_max); - err = -EIO; + err = -EFSCORRUPTED; goto cleanup; } /* start copy from next extent */ @@ -1151,7 +1152,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, k = depth - at - 1; if (unlikely(k < 0)) { EXT4_ERROR_INODE(inode, "k %d < 0!", k); - err = -EIO; + err = -EFSCORRUPTED; goto cleanup; } if (k) @@ -1191,7 +1192,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, EXT4_ERROR_INODE(inode, "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!", le32_to_cpu(path[i].p_ext->ee_block)); - err = -EIO; + err = -EFSCORRUPTED; goto cleanup; } /* start copy indexes */ @@ -1425,7 +1426,7 @@ static int ext4_ext_search_left(struct inode *inode, if (unlikely(path == NULL)) { EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); - return -EIO; + return -EFSCORRUPTED; } depth = path->p_depth; *phys = 0; @@ -1444,7 +1445,7 @@ static int ext4_ext_search_left(struct inode *inode, EXT4_ERROR_INODE(inode, "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!", *logical, le32_to_cpu(ex->ee_block)); - return -EIO; + return -EFSCORRUPTED; } while (--depth >= 0) { ix = path[depth].p_idx; @@ -1455,7 +1456,7 @@ static int ext4_ext_search_left(struct inode *inode, EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0, depth); - return -EIO; + return -EFSCORRUPTED; } } return 0; @@ -1465,7 +1466,7 @@ static int ext4_ext_search_left(struct inode *inode, EXT4_ERROR_INODE(inode, "logical %d < ee_block %d + ee_len %d!", *logical, le32_to_cpu(ex->ee_block), ee_len); - return -EIO; + return -EFSCORRUPTED; } *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; @@ -1495,7 +1496,7 @@ static int ext4_ext_search_right(struct inode *inode, if (unlikely(path == NULL)) { EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); - return -EIO; + return -EFSCORRUPTED; } depth = path->p_depth; *phys = 0; @@ -1514,7 +1515,7 @@ static int ext4_ext_search_right(struct inode *inode, EXT4_ERROR_INODE(inode, "first_extent(path[%d].p_hdr) != ex", depth); - return -EIO; + return -EFSCORRUPTED; } while (--depth >= 0) { ix = path[depth].p_idx; @@ -1522,7 +1523,7 @@ static int ext4_ext_search_right(struct inode *inode, EXT4_ERROR_INODE(inode, "ix != EXT_FIRST_INDEX *logical %d!", *logical); - return -EIO; + return -EFSCORRUPTED; } } goto found_extent; @@ -1532,7 +1533,7 @@ static int ext4_ext_search_right(struct inode *inode, EXT4_ERROR_INODE(inode, "logical %d < ee_block %d + ee_len %d!", *logical, le32_to_cpu(ex->ee_block), ee_len); - return -EIO; + return -EFSCORRUPTED; } if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { @@ -1670,7 +1671,7 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, if (unlikely(ex == NULL || eh == NULL)) { EXT4_ERROR_INODE(inode, "ex %p == NULL or eh %p == NULL", ex, eh); - return -EIO; + return -EFSCORRUPTED; } if (depth == 0) { @@ -1938,14 +1939,14 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, mb_flags |= EXT4_MB_DELALLOC_RESERVED; if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); - return -EIO; + return -EFSCORRUPTED; } depth = ext_depth(inode); ex = path[depth].p_ext; eh = path[depth].p_hdr; if (unlikely(path[depth].p_hdr == NULL)) { EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); - return -EIO; + return -EFSCORRUPTED; } /* try to insert block into found extent and return */ @@ -2172,7 +2173,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode, if (unlikely(path[depth].p_hdr == NULL)) { up_read(&EXT4_I(inode)->i_data_sem); EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); - err = -EIO; + err = -EFSCORRUPTED; break; } ex = path[depth].p_ext; @@ -2241,7 +2242,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode, if (unlikely(es.es_len == 0)) { EXT4_ERROR_INODE(inode, "es.es_len == 0"); - err = -EIO; + err = -EFSCORRUPTED; break; } @@ -2264,7 +2265,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode, "next extent == %u, next " "delalloc extent = %u", next, next_del); - err = -EIO; + err = -EFSCORRUPTED; break; } } @@ -2363,7 +2364,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, leaf = ext4_idx_pblock(path->p_idx); if (unlikely(path->p_hdr->eh_entries == 0)) { EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); - return -EIO; + return -EFSCORRUPTED; } err = ext4_ext_get_access(handle, inode, path); if (err) @@ -2612,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, eh = path[depth].p_hdr; if (unlikely(path[depth].p_hdr == NULL)) { EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); - return -EIO; + return -EFSCORRUPTED; } /* find where to start removing */ ex = path[depth].p_ext; @@ -2666,7 +2667,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, "on extent %u:%u", start, end, ex_ee_block, ex_ee_block + ex_ee_len - 1); - err = -EIO; + err = -EFSCORRUPTED; goto out; } else if (a != ex_ee_block) { /* remove tail of the extent */ @@ -2841,7 +2842,7 @@ again: EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); - err = -EIO; + err = -EFSCORRUPTED; } goto out; } @@ -2920,7 +2921,7 @@ again: i = 0; if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) { - err = -EIO; + err = -EFSCORRUPTED; goto out; } } @@ -2978,7 +2979,7 @@ again: * Should be a no-op if we did IO above. */ cond_resched(); if (WARN_ON(i + 1 > depth)) { - err = -EIO; + err = -EFSCORRUPTED; break; } path[i + 1].p_bh = bh; @@ -3345,7 +3346,7 @@ static int ext4_split_extent(handle_t *handle, if (!ex) { EXT4_ERROR_INODE(inode, "unexpected hole at %lu", (unsigned long) map->m_lblk); - return -EIO; + return -EFSCORRUPTED; } unwritten = ext4_ext_is_unwritten(ex); split_flag1 = 0; @@ -3973,7 +3974,7 @@ convert_initialized_extent(handle_t *handle, struct inode *inode, if (!ex) { EXT4_ERROR_INODE(inode, "unexpected hole at %lu", (unsigned long) map->m_lblk); - return -EIO; + return -EFSCORRUPTED; } } @@ -4311,7 +4312,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, "lblock: %lu, depth: %d pblock %lld", (unsigned long) map->m_lblk, depth, path[depth].p_block); - err = -EIO; + err = -EFSCORRUPTED; goto out2; } @@ -5274,7 +5275,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, if (depth == path->p_depth) { ex_start = path[depth].p_ext; if (!ex_start) - return -EIO; + return -EFSCORRUPTED; ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); @@ -5414,7 +5415,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, if (!extent) { EXT4_ERROR_INODE(inode, "unexpected hole at %lu", (unsigned long) *iterator); - return -EIO; + return -EFSCORRUPTED; } if (SHIFT == SHIFT_LEFT && *iterator > le32_to_cpu(extent->ee_block)) { diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 619bfc1fda8c..f34b1aa8bbb5 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1116,6 +1116,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) /* Error cases - e2fsck has already cleaned up for us */ if (ino > max_ino) { ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino); + err = -EFSCORRUPTED; goto error; } diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 2468261748b2..d96ea53bbece 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -566,7 +566,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { EXT4_ERROR_INODE(inode, "Can't allocate blocks for " "non-extent mapped inodes with bigalloc"); - return -EUCLEAN; + return -EFSCORRUPTED; } /* Set up for the direct block allocation */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f205ac3c4e41..2b9278867caf 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -378,7 +378,7 @@ static int __check_block_validity(struct inode *inode, const char *func, "lblock %lu mapped to illegal pblock " "(length %d)", (unsigned long) map->m_lblk, map->m_len); - return -EIO; + return -EFSCORRUPTED; } return 0; } @@ -480,7 +480,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, /* We can handle the block number less than EXT_MAX_BLOCKS */ if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS)) - return -EIO; + return -EFSCORRUPTED; /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { @@ -3863,7 +3863,7 @@ static int __ext4_get_inode_loc(struct inode *inode, iloc->bh = NULL; if (!ext4_valid_inum(sb, inode->i_ino)) - return -EIO; + return -EFSCORRUPTED; iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); @@ -4111,7 +4111,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)", EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize, EXT4_INODE_SIZE(inode->i_sb)); - ret = -EIO; + ret = -EFSCORRUPTED; goto bad_inode; } } else @@ -4131,7 +4131,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) if (!ext4_inode_csum_verify(inode, raw_inode, ei)) { EXT4_ERROR_INODE(inode, "checksum invalid"); - ret = -EIO; + ret = -EFSBADCRC; goto bad_inode; } @@ -4246,7 +4246,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { EXT4_ERROR_INODE(inode, "bad extended attribute block %llu", ei->i_file_acl); - ret = -EIO; + ret = -EFSCORRUPTED; goto bad_inode; } else if (!ext4_has_inline_data(inode)) { if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { @@ -4297,7 +4297,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) } else if (ino == EXT4_BOOT_LOADER_INO) { make_bad_inode(inode); } else { - ret = -EIO; + ret = -EFSCORRUPTED; EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode); goto bad_inode; } @@ -4315,7 +4315,7 @@ bad_inode: struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino) { if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) - return ERR_PTR(-EIO); + return ERR_PTR(-EFSCORRUPTED); return ext4_iget(sb, ino); } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 6eb1a619890c..0a512aa81bf7 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -98,10 +98,12 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, } mmp = (struct mmp_struct *)((*bh)->b_data); - if (le32_to_cpu(mmp->mmp_magic) == EXT4_MMP_MAGIC && - ext4_mmp_csum_verify(sb, mmp)) + if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) + ret = -EFSCORRUPTED; + else if (!ext4_mmp_csum_verify(sb, mmp)) + ret = -EFSBADCRC; + else return 0; - ret = -EINVAL; warn_exit: ext4_warning(sb, "Error %d while reading MMP block %llu", diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 9f61e7679a6d..8fd8e0d4c75e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -109,7 +109,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, if (!bh) { ext4_error_inode(inode, func, line, block, "Directory hole found"); - return ERR_PTR(-EIO); + return ERR_PTR(-EFSCORRUPTED); } dirent = (struct ext4_dir_entry *) bh->b_data; /* Determine whether or not we have an index block */ @@ -124,7 +124,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, if (!is_dx_block && type == INDEX) { ext4_error_inode(inode, func, line, block, "directory leaf block found instead of index block"); - return ERR_PTR(-EIO); + return ERR_PTR(-EFSCORRUPTED); } if (!ext4_has_metadata_csum(inode->i_sb) || buffer_verified(bh)) @@ -142,7 +142,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, ext4_error_inode(inode, func, line, block, "Directory index failed checksum"); brelse(bh); - return ERR_PTR(-EIO); + return ERR_PTR(-EFSBADCRC); } } if (!is_dx_block) { @@ -152,7 +152,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, ext4_error_inode(inode, func, line, block, "Directory block failed checksum"); brelse(bh); - return ERR_PTR(-EIO); + return ERR_PTR(-EFSBADCRC); } } return bh; @@ -1570,19 +1570,19 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi brelse(bh); if (!ext4_valid_inum(dir->i_sb, ino)) { EXT4_ERROR_INODE(dir, "bad inode number: %u", ino); - return ERR_PTR(-EIO); + return ERR_PTR(-EFSCORRUPTED); } if (unlikely(ino == dir->i_ino)) { EXT4_ERROR_INODE(dir, "'%pd' linked to parent dir", dentry); - return ERR_PTR(-EIO); + return ERR_PTR(-EFSCORRUPTED); } inode = ext4_iget_normal(dir->i_sb, ino); if (inode == ERR_PTR(-ESTALE)) { EXT4_ERROR_INODE(dir, "deleted inode referenced: %u", ino); - return ERR_PTR(-EIO); + return ERR_PTR(-EFSCORRUPTED); } if (!IS_ERR(inode) && ext4_encrypted_inode(dir) && (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || @@ -1619,7 +1619,7 @@ struct dentry *ext4_get_parent(struct dentry *child) if (!ext4_valid_inum(d_inode(child)->i_sb, ino)) { EXT4_ERROR_INODE(d_inode(child), "bad parent inode number: %u", ino); - return ERR_PTR(-EIO); + return ERR_PTR(-EFSCORRUPTED); } return d_obtain_alias(ext4_iget_normal(d_inode(child)->i_sb, ino)); @@ -1807,7 +1807,7 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode, while ((char *) de <= top) { if (ext4_check_dir_entry(dir, NULL, de, bh, buf, buf_size, offset)) { - res = -EIO; + res = -EFSCORRUPTED; goto return_result; } /* Provide crypto context and crypto buffer to ext4 match */ @@ -1967,7 +1967,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, if ((char *) de >= (((char *) root) + blocksize)) { EXT4_ERROR_INODE(dir, "invalid rec_len for '..'"); brelse(bh); - return -EIO; + return -EFSCORRUPTED; } len = ((char *) root) + (blocksize - csum_size) - (char *) de; @@ -2315,7 +2315,7 @@ int ext4_generic_delete_entry(handle_t *handle, while (i < buf_size - csum_size) { if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data, bh->b_size, i)) - return -EIO; + return -EFSCORRUPTED; if (de == de_del) { if (pde) pde->rec_len = ext4_rec_len_to_disk( @@ -2934,7 +2934,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) inode = d_inode(dentry); - retval = -EIO; + retval = -EFSCORRUPTED; if (le32_to_cpu(de->inode) != inode->i_ino) goto end_rmdir; @@ -3008,7 +3008,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) inode = d_inode(dentry); - retval = -EIO; + retval = -EFSCORRUPTED; if (le32_to_cpu(de->inode) != inode->i_ino) goto end_unlink; @@ -3310,7 +3310,7 @@ static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent) if (!ent->dir_bh) return retval; if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino) - return -EIO; + return -EFSCORRUPTED; BUFFER_TRACE(ent->dir_bh, "get_write_access"); return ext4_journal_get_write_access(handle, ent->dir_bh); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 43c0cc802189..f7519ad25cce 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -490,6 +490,12 @@ const char *ext4_decode_error(struct super_block *sb, int errno, char *errstr = NULL; switch (errno) { + case -EFSCORRUPTED: + errstr = "Corrupt filesystem"; + break; + case -EFSBADCRC: + errstr = "Filesystem failed CRC"; + break; case -EIO: errstr = "IO failure"; break; @@ -3192,6 +3198,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " "invalid superblock checksum. Run e2fsck?"); silent = 1; + ret = -EFSBADCRC; goto cantfind_ext4; } @@ -3612,6 +3619,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (!ext4_check_descriptors(sb, &first_not_zeroed)) { ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); + ret = -EFSCORRUPTED; goto failed_mount2; } @@ -4664,7 +4672,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) "ext4_remount: Checksum for group %u failed (%u!=%u)", g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), le16_to_cpu(gdp->bg_checksum)); - err = -EINVAL; + err = -EFSBADCRC; goto restore_opts; } } diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index c677f2c1044b..abe2401ce405 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -57,7 +57,7 @@ static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cook sizeof(struct ext4_encrypted_symlink_data) - 1) > max_size) { /* Symlink data on the disk is corrupted */ - res = -EIO; + res = -EFSCORRUPTED; goto errout; } plen = (cstr.len < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2) ? diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 16e28c08d1e8..7649422ed7a1 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -195,7 +195,7 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end, while (!IS_LAST_ENTRY(e)) { struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); if ((void *)next >= end) - return -EIO; + return -EFSCORRUPTED; e = next; } @@ -205,7 +205,7 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end, (void *)e + sizeof(__u32) || value_start + le16_to_cpu(entry->e_value_offs) + le32_to_cpu(entry->e_value_size) > end)) - return -EIO; + return -EFSCORRUPTED; entry = EXT4_XATTR_NEXT(entry); } @@ -222,9 +222,9 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) - return -EIO; + return -EFSCORRUPTED; if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) - return -EIO; + return -EFSBADCRC; error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size, bh->b_data); if (!error) @@ -239,7 +239,7 @@ ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size) if (entry->e_value_block != 0 || value_size > size || le16_to_cpu(entry->e_value_offs) + value_size > size) - return -EIO; + return -EFSCORRUPTED; return 0; } @@ -266,7 +266,7 @@ ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index, } *pentry = entry; if (!cmp && ext4_xattr_check_entry(entry, size)) - return -EIO; + return -EFSCORRUPTED; return cmp ? -ENODATA : 0; } @@ -297,13 +297,13 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, bad_block: EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); - error = -EIO; + error = -EFSCORRUPTED; goto cleanup; } ext4_xattr_cache_insert(ext4_mb_cache, bh); entry = BFIRST(bh); error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); - if (error == -EIO) + if (error == -EFSCORRUPTED) goto bad_block; if (error) goto cleanup; @@ -445,7 +445,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (ext4_xattr_check_block(inode, bh)) { EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); - error = -EIO; + error = -EFSCORRUPTED; goto cleanup; } ext4_xattr_cache_insert(ext4_mb_cache, bh); @@ -751,7 +751,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, if (ext4_xattr_check_block(inode, bs->bh)) { EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); - error = -EIO; + error = -EFSCORRUPTED; goto cleanup; } /* Find the named attribute. */ @@ -811,7 +811,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, bs->bh); } unlock_buffer(bs->bh); - if (error == -EIO) + if (error == -EFSCORRUPTED) goto bad_block; if (!error) error = ext4_handle_dirty_xattr_block(handle, @@ -855,7 +855,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, } error = ext4_xattr_set_entry(i, s); - if (error == -EIO) + if (error == -EFSCORRUPTED) goto bad_block; if (error) goto cleanup; @@ -1314,7 +1314,7 @@ retry: if (ext4_xattr_check_block(inode, bh)) { EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); - error = -EIO; + error = -EFSCORRUPTED; goto cleanup; } base = BHDR(bh); @@ -1579,7 +1579,7 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1, memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) return 1; if (entry1->e_value_block != 0 || entry2->e_value_block != 0) - return -EIO; + return -EFSCORRUPTED; if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), (char *)header2 + le16_to_cpu(entry2->e_value_offs), le32_to_cpu(entry1->e_value_size))) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 00f7dbdd64b0..474c1781c0ce 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1558,6 +1558,7 @@ static int journal_get_superblock(journal_t *journal) /* Check superblock checksum */ if (!jbd2_superblock_csum_verify(journal, sb)) { printk(KERN_ERR "JBD2: journal checksum error\n"); + err = -EFSBADCRC; goto out; } @@ -1649,7 +1650,7 @@ int jbd2_journal_load(journal_t *journal) printk(KERN_ERR "JBD2: journal transaction %u on %s " "is corrupt.\n", journal->j_failed_commit, journal->j_devname); - return -EIO; + return -EFSCORRUPTED; } /* OK, we've finished with the dynamic journal bits: diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index a9079d035ae5..5c836d78af3b 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -140,7 +140,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal, if (offset >= journal->j_maxlen) { printk(KERN_ERR "JBD2: corrupted journal superblock\n"); - return -EIO; + return -EFSCORRUPTED; } err = jbd2_journal_bmap(journal, offset, &blocknr); @@ -527,7 +527,7 @@ static int do_one_pass(journal_t *journal, printk(KERN_ERR "JBD2: Invalid checksum " "recovering block %lu in log\n", next_log_block); - err = -EIO; + err = -EFSBADCRC; brelse(bh); goto failed; } @@ -602,7 +602,7 @@ static int do_one_pass(journal_t *journal, journal, tag, obh->b_data, be32_to_cpu(tmp->h_sequence))) { brelse(obh); - success = -EIO; + success = -EFSBADCRC; printk(KERN_ERR "JBD2: Invalid " "checksum recovering " "block %llu in log\n", @@ -851,7 +851,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, rcount = be32_to_cpu(header->r_count); if (!jbd2_revoke_block_csum_verify(journal, header)) - return -EINVAL; + return -EFSBADCRC; if (jbd2_journal_has_csum_v2or3(journal)) csum_size = sizeof(struct jbd2_journal_revoke_tail); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 6da6f89722e1..f2a4b07a6ce6 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1449,4 +1449,7 @@ static inline tid_t jbd2_get_latest_transaction(journal_t *journal) #endif /* __KERNEL__ */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ + #endif /* _LINUX_JBD2_H */ From e2b911c53584a92266943f3b7f2cdbc19c1a4e80 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 17 Oct 2015 16:18:43 -0400 Subject: [PATCH 15/26] ext4: clean up feature test macros with predicate functions Create separate predicate functions to test/set/clear feature flags, thereby replacing the wordy old macros. Furthermore, clean out the places where we open-coded feature tests. Signed-off-by: Darrick J. Wong --- fs/ext4/balloc.c | 16 ++--- fs/ext4/dir.c | 3 +- fs/ext4/ext4.h | 146 +++++++++++++++++++++++++++++++++++---- fs/ext4/ext4_jbd2.h | 10 ++- fs/ext4/extents.c | 4 +- fs/ext4/ialloc.c | 4 +- fs/ext4/indirect.c | 3 +- fs/ext4/inline.c | 3 +- fs/ext4/inode.c | 16 ++--- fs/ext4/ioctl.c | 15 ++-- fs/ext4/migrate.c | 9 +-- fs/ext4/namei.c | 8 +-- fs/ext4/resize.c | 30 ++++---- fs/ext4/super.c | 165 +++++++++++++++++++------------------------- fs/ext4/xattr.c | 4 +- 15 files changed, 252 insertions(+), 184 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index f831e10a7475..32184c16af16 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -213,7 +213,7 @@ static int ext4_init_block_bitmap(struct super_block *sb, start = ext4_group_first_block_no(sb, block_group); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) + if (ext4_has_feature_flex_bg(sb)) flex_bg = 1; /* Set bits for block and inode bitmaps, and inode table */ @@ -322,7 +322,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, ext4_fsblk_t blk; ext4_fsblk_t group_first_block; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { + if (ext4_has_feature_flex_bg(sb)) { /* with FLEX_BG, the inode/block bitmaps and itable * blocks may not be in the group at all * so the bitmap validation will be skipped for those groups @@ -740,14 +740,13 @@ int ext4_bg_has_super(struct super_block *sb, ext4_group_t group) if (group == 0) return 1; - if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_SPARSE_SUPER2)) { + if (ext4_has_feature_sparse_super2(sb)) { if (group == le32_to_cpu(es->s_backup_bgs[0]) || group == le32_to_cpu(es->s_backup_bgs[1])) return 1; return 0; } - if ((group <= 1) || !EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) + if ((group <= 1) || !ext4_has_feature_sparse_super(sb)) return 1; if (!(group & 1)) return 0; @@ -776,7 +775,7 @@ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, if (!ext4_bg_has_super(sb, group)) return 0; - if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG)) + if (ext4_has_feature_meta_bg(sb)) return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); else return EXT4_SB(sb)->s_gdb_count; @@ -797,8 +796,7 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); - if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || - metagroup < first_meta_bg) + if (!ext4_has_feature_meta_bg(sb) || metagroup < first_meta_bg) return ext4_bg_num_gdb_nometa(sb, group); return ext4_bg_num_gdb_meta(sb,group); @@ -818,7 +816,7 @@ static unsigned ext4_num_base_meta_clusters(struct super_block *sb, /* Check for superblock and gdt backups in this group */ num = ext4_bg_has_super(sb, block_group); - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || + if (!ext4_has_feature_meta_bg(sb) || block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * sbi->s_desc_per_block) { if (num) { diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index b29cb707539f..1d1bca74f844 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -40,8 +40,7 @@ static int is_dx_dir(struct inode *inode) { struct super_block *sb = inode->i_sb; - if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_COMPAT_DIR_INDEX) && + if (ext4_has_feature_dir_index(inode->i_sb) && ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) || ((inode->i_size >> sb->s_blocksize_bits) == 1) || ext4_has_inline_data(inode))) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6c0797346da1..b1d04aa724be 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1528,6 +1528,7 @@ static inline int ext4_encrypted_inode(struct inode *inode) * Feature set definitions */ +/* Use the ext4_{has,set,clear}_feature_* helpers; these will be removed */ #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ ((EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) != 0) #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ @@ -1590,6 +1591,94 @@ static inline int ext4_encrypted_inode(struct inode *inode) #define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */ #define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000 +#define EXT4_FEATURE_COMPAT_FUNCS(name, flagname) \ +static inline bool ext4_has_feature_##name(struct super_block *sb) \ +{ \ + return ((EXT4_SB(sb)->s_es->s_feature_compat & \ + cpu_to_le32(EXT4_FEATURE_COMPAT_##flagname)) != 0); \ +} \ +static inline void ext4_set_feature_##name(struct super_block *sb) \ +{ \ + EXT4_SB(sb)->s_es->s_feature_compat |= \ + cpu_to_le32(EXT4_FEATURE_COMPAT_##flagname); \ +} \ +static inline void ext4_clear_feature_##name(struct super_block *sb) \ +{ \ + EXT4_SB(sb)->s_es->s_feature_compat &= \ + ~cpu_to_le32(EXT4_FEATURE_COMPAT_##flagname); \ +} + +#define EXT4_FEATURE_RO_COMPAT_FUNCS(name, flagname) \ +static inline bool ext4_has_feature_##name(struct super_block *sb) \ +{ \ + return ((EXT4_SB(sb)->s_es->s_feature_ro_compat & \ + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_##flagname)) != 0); \ +} \ +static inline void ext4_set_feature_##name(struct super_block *sb) \ +{ \ + EXT4_SB(sb)->s_es->s_feature_ro_compat |= \ + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_##flagname); \ +} \ +static inline void ext4_clear_feature_##name(struct super_block *sb) \ +{ \ + EXT4_SB(sb)->s_es->s_feature_ro_compat &= \ + ~cpu_to_le32(EXT4_FEATURE_RO_COMPAT_##flagname); \ +} + +#define EXT4_FEATURE_INCOMPAT_FUNCS(name, flagname) \ +static inline bool ext4_has_feature_##name(struct super_block *sb) \ +{ \ + return ((EXT4_SB(sb)->s_es->s_feature_incompat & \ + cpu_to_le32(EXT4_FEATURE_INCOMPAT_##flagname)) != 0); \ +} \ +static inline void ext4_set_feature_##name(struct super_block *sb) \ +{ \ + EXT4_SB(sb)->s_es->s_feature_incompat |= \ + cpu_to_le32(EXT4_FEATURE_INCOMPAT_##flagname); \ +} \ +static inline void ext4_clear_feature_##name(struct super_block *sb) \ +{ \ + EXT4_SB(sb)->s_es->s_feature_incompat &= \ + ~cpu_to_le32(EXT4_FEATURE_INCOMPAT_##flagname); \ +} + +EXT4_FEATURE_COMPAT_FUNCS(dir_prealloc, DIR_PREALLOC) +EXT4_FEATURE_COMPAT_FUNCS(imagic_inodes, IMAGIC_INODES) +EXT4_FEATURE_COMPAT_FUNCS(journal, HAS_JOURNAL) +EXT4_FEATURE_COMPAT_FUNCS(xattr, EXT_ATTR) +EXT4_FEATURE_COMPAT_FUNCS(resize_inode, RESIZE_INODE) +EXT4_FEATURE_COMPAT_FUNCS(dir_index, DIR_INDEX) +EXT4_FEATURE_COMPAT_FUNCS(sparse_super2, SPARSE_SUPER2) + +EXT4_FEATURE_RO_COMPAT_FUNCS(sparse_super, SPARSE_SUPER) +EXT4_FEATURE_RO_COMPAT_FUNCS(large_file, LARGE_FILE) +EXT4_FEATURE_RO_COMPAT_FUNCS(btree_dir, BTREE_DIR) +EXT4_FEATURE_RO_COMPAT_FUNCS(huge_file, HUGE_FILE) +EXT4_FEATURE_RO_COMPAT_FUNCS(gdt_csum, GDT_CSUM) +EXT4_FEATURE_RO_COMPAT_FUNCS(dir_nlink, DIR_NLINK) +EXT4_FEATURE_RO_COMPAT_FUNCS(extra_isize, EXTRA_ISIZE) +EXT4_FEATURE_RO_COMPAT_FUNCS(quota, QUOTA) +EXT4_FEATURE_RO_COMPAT_FUNCS(bigalloc, BIGALLOC) +EXT4_FEATURE_RO_COMPAT_FUNCS(metadata_csum, METADATA_CSUM) +EXT4_FEATURE_RO_COMPAT_FUNCS(readonly, READONLY) +EXT4_FEATURE_RO_COMPAT_FUNCS(project, PROJECT) + +EXT4_FEATURE_INCOMPAT_FUNCS(compression, COMPRESSION) +EXT4_FEATURE_INCOMPAT_FUNCS(filetype, FILETYPE) +EXT4_FEATURE_INCOMPAT_FUNCS(journal_needs_recovery, RECOVER) +EXT4_FEATURE_INCOMPAT_FUNCS(journal_dev, JOURNAL_DEV) +EXT4_FEATURE_INCOMPAT_FUNCS(meta_bg, META_BG) +EXT4_FEATURE_INCOMPAT_FUNCS(extents, EXTENTS) +EXT4_FEATURE_INCOMPAT_FUNCS(64bit, 64BIT) +EXT4_FEATURE_INCOMPAT_FUNCS(mmp, MMP) +EXT4_FEATURE_INCOMPAT_FUNCS(flex_bg, FLEX_BG) +EXT4_FEATURE_INCOMPAT_FUNCS(ea_inode, EA_INODE) +EXT4_FEATURE_INCOMPAT_FUNCS(dirdata, DIRDATA) +EXT4_FEATURE_INCOMPAT_FUNCS(csum_seed, CSUM_SEED) +EXT4_FEATURE_INCOMPAT_FUNCS(largedir, LARGEDIR) +EXT4_FEATURE_INCOMPAT_FUNCS(inline_data, INLINE_DATA) +EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT) + #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ EXT4_FEATURE_INCOMPAT_META_BG) @@ -1605,7 +1694,7 @@ static inline int ext4_encrypted_inode(struct inode *inode) EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ EXT4_FEATURE_RO_COMPAT_BTREE_DIR) -#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR +#define EXT4_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ EXT4_FEATURE_INCOMPAT_RECOVER| \ EXT4_FEATURE_INCOMPAT_META_BG| \ @@ -1627,6 +1716,40 @@ static inline int ext4_encrypted_inode(struct inode *inode) EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\ EXT4_FEATURE_RO_COMPAT_QUOTA) +#define EXTN_FEATURE_FUNCS(ver) \ +static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \ +{ \ + return ((EXT4_SB(sb)->s_es->s_feature_compat & \ + cpu_to_le32(~EXT##ver##_FEATURE_COMPAT_SUPP)) != 0); \ +} \ +static inline bool ext4_has_unknown_ext##ver##_ro_compat_features(struct super_block *sb) \ +{ \ + return ((EXT4_SB(sb)->s_es->s_feature_ro_compat & \ + cpu_to_le32(~EXT##ver##_FEATURE_RO_COMPAT_SUPP)) != 0); \ +} \ +static inline bool ext4_has_unknown_ext##ver##_incompat_features(struct super_block *sb) \ +{ \ + return ((EXT4_SB(sb)->s_es->s_feature_incompat & \ + cpu_to_le32(~EXT##ver##_FEATURE_INCOMPAT_SUPP)) != 0); \ +} + +EXTN_FEATURE_FUNCS(2) +EXTN_FEATURE_FUNCS(3) +EXTN_FEATURE_FUNCS(4) + +static inline bool ext4_has_compat_features(struct super_block *sb) +{ + return (EXT4_SB(sb)->s_es->s_feature_compat != 0); +} +static inline bool ext4_has_ro_compat_features(struct super_block *sb) +{ + return (EXT4_SB(sb)->s_es->s_feature_ro_compat != 0); +} +static inline bool ext4_has_incompat_features(struct super_block *sb) +{ + return (EXT4_SB(sb)->s_es->s_feature_incompat != 0); +} + /* * Default values for user and/or group using reserved blocks */ @@ -1777,8 +1900,7 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) * (c) Daniel Phillips, 2001 */ -#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ - EXT4_FEATURE_COMPAT_DIR_INDEX) && \ +#define is_dx(dir) (ext4_has_feature_dir_index((dir)->i_sb) && \ ext4_test_inode_flag((dir), EXT4_INODE_INDEX)) #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) @@ -2079,7 +2201,7 @@ int ext4_init_crypto(void); void ext4_exit_crypto(void); static inline int ext4_sb_has_crypto(struct super_block *sb) { - return EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT); + return ext4_has_feature_encrypt(sb); } #else static inline int ext4_init_crypto(void) { return 0; } @@ -2200,8 +2322,7 @@ int ext4_insert_dentry(struct inode *dir, struct ext4_filename *fname); static inline void ext4_update_dx_flag(struct inode *inode) { - if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_COMPAT_DIR_INDEX)) + if (!ext4_has_feature_dir_index(inode->i_sb)) ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); } static unsigned char ext4_filetype_table[] = { @@ -2210,8 +2331,7 @@ static unsigned char ext4_filetype_table[] = { static inline unsigned char get_dtype(struct super_block *sb, int filetype) { - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) || - (filetype >= EXT4_FT_MAX)) + if (!ext4_has_feature_filetype(sb) || filetype >= EXT4_FT_MAX) return DT_UNKNOWN; return ext4_filetype_table[filetype]; @@ -2543,15 +2663,13 @@ extern int ext4_register_li_request(struct super_block *sb, static inline int ext4_has_group_desc_csum(struct super_block *sb) { - return EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_GDT_CSUM) || - (EXT4_SB(sb)->s_chksum_driver != NULL); + return ext4_has_feature_gdt_csum(sb) || + EXT4_SB(sb)->s_chksum_driver != NULL; } static inline int ext4_has_metadata_csum(struct super_block *sb) { - WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && + WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) && !EXT4_SB(sb)->s_chksum_driver); return (EXT4_SB(sb)->s_chksum_driver != NULL); @@ -2898,7 +3016,7 @@ static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = { static inline void ext4_set_de_type(struct super_block *sb, struct ext4_dir_entry_2 *de, umode_t mode) { - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE)) + if (ext4_has_feature_filetype(sb)) de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; } diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 9c5b49fb281e..5f5846211095 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -34,8 +34,7 @@ */ #define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \ - (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \ - ? 20U : 8U) + (ext4_has_feature_extents(sb) ? 20U : 8U) /* Extended attribute operations touch at most two data buffers, * two bitmap buffers, and two group summaries, in addition to the inode @@ -84,17 +83,16 @@ /* Amount of blocks needed for quota update - we know that the structure was * allocated so we need to update only data block */ #define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ - EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\ - 1 : 0) + ext4_has_feature_quota(sb)) ? 1 : 0) /* Amount of blocks needed for quota insert/delete - we do some block writes * but inode, sb and group updates are done only once */ #define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ - EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\ + ext4_has_feature_quota(sb)) ?\ (DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ +3+DQUOT_INIT_REWRITE) : 0) #define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ - EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\ + ext4_has_feature_quota(sb)) ?\ (DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ +3+DQUOT_DEL_REWRITE) : 0) #else diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index faeaba8582d7..3a6197a2e270 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3055,7 +3055,7 @@ void ext4_ext_init(struct super_block *sb) * possible initialization would be here */ - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { + if (ext4_has_feature_extents(sb)) { #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS) printk(KERN_INFO "EXT4-fs: file extents enabled" #ifdef AGGRESSIVE_TEST @@ -3082,7 +3082,7 @@ void ext4_ext_init(struct super_block *sb) */ void ext4_ext_release(struct super_block *sb) { - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) + if (!ext4_has_feature_extents(sb)) return; #ifdef EXTENTS_STATS diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f34b1aa8bbb5..0d7f06ed7ee2 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1045,7 +1045,7 @@ got: ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; ei->i_inline_off = 0; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA)) + if (ext4_has_feature_inline_data(sb)) ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); ret = inode; err = dquot_alloc_inode(inode); @@ -1060,7 +1060,7 @@ got: if (err) goto fail_free_drop; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { + if (ext4_has_feature_extents(sb)) { /* set extent flag only for directory, file and normal symlink*/ if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index d96ea53bbece..355ef9c36c87 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -562,8 +562,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, /* * Okay, we need to do block allocation. */ - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { + if (ext4_has_feature_bigalloc(inode->i_sb)) { EXT4_ERROR_INODE(inode, "Can't allocate blocks for " "non-extent mapped inodes with bigalloc"); return -EFSCORRUPTED; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index cd944a7a99cd..d884989cc83d 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -434,8 +434,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle, memset((void *)ext4_raw_inode(&is.iloc)->i_block, 0, EXT4_MIN_INLINE_DATA_SIZE); - if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_INCOMPAT_EXTENTS)) { + if (ext4_has_feature_extents(inode->i_sb)) { if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) { ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2b9278867caf..e8d620a484f6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2642,8 +2642,7 @@ static int ext4_nonda_switch(struct super_block *sb) /* We always reserve for an inode update; the superblock could be there too */ static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len) { - if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_LARGE_FILE))) + if (likely(ext4_has_feature_large_file(inode->i_sb))) return 1; if (pos + len <= 0x7fffffffULL) @@ -4049,8 +4048,7 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, struct inode *inode = &(ei->vfs_inode); struct super_block *sb = inode->i_sb; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { + if (ext4_has_feature_huge_file(sb)) { /* we are using combined 48 bit field */ i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | le32_to_cpu(raw_inode->i_blocks_lo); @@ -4173,7 +4171,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_flags = le32_to_cpu(raw_inode->i_flags); inode->i_blocks = ext4_inode_blocks(raw_inode, ei); ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) + if (ext4_has_feature_64bit(sb)) ei->i_file_acl |= ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; inode->i_size = ext4_isize(raw_inode); @@ -4337,7 +4335,7 @@ static int ext4_inode_blocks_set(handle_t *handle, ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); return 0; } - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) + if (!ext4_has_feature_huge_file(sb)) return -EFBIG; if (i_blocks <= 0xffffffffffffULL) { @@ -4498,8 +4496,7 @@ static int ext4_do_update_inode(handle_t *handle, need_datasync = 1; } if (ei->i_disksize > 0x7fffffffULL) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || + if (!ext4_has_feature_large_file(sb) || EXT4_SB(sb)->s_es->s_rev_level == cpu_to_le32(EXT4_GOOD_OLD_REV)) set_large_file = 1; @@ -4548,8 +4545,7 @@ static int ext4_do_update_inode(handle_t *handle, if (err) goto out_brelse; ext4_update_dynamic_rev(sb); - EXT4_SET_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_LARGE_FILE); + ext4_set_feature_large_file(sb); ext4_handle_sync(handle); err = ext4_handle_dirty_super(handle, sb); } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 1346cfa355d0..5e872fd40e5e 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -145,8 +145,7 @@ static long swap_inode_boot_loader(struct super_block *sb, inode_bl->i_version = 1; i_size_write(inode_bl, 0); inode_bl->i_mode = S_IFREG; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_EXTENTS)) { + if (ext4_has_feature_extents(sb)) { ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS); ext4_ext_tree_init(handle, inode_bl); } else @@ -383,8 +382,7 @@ setversion_out: goto group_extend_out; } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { + if (ext4_has_feature_bigalloc(sb)) { ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc"); err = -EOPNOTSUPP; @@ -432,8 +430,7 @@ group_extend_out: goto mext_out; } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { + if (ext4_has_feature_bigalloc(sb)) { ext4_msg(sb, KERN_ERR, "Online defrag not supported with bigalloc"); err = -EOPNOTSUPP; @@ -470,8 +467,7 @@ mext_out: goto group_add_out; } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { + if (ext4_has_feature_bigalloc(sb)) { ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc"); err = -EOPNOTSUPP; @@ -553,8 +549,7 @@ group_add_out: int err = 0, err2 = 0; ext4_group_t o_group = EXT4_SB(sb)->s_groups_count; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { + if (ext4_has_feature_bigalloc(sb)) { ext4_msg(sb, KERN_ERR, "Online resizing not (yet) supported with bigalloc"); return -EOPNOTSUPP; diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 6163ad21cb0e..a4651894cc33 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -448,8 +448,7 @@ int ext4_ext_migrate(struct inode *inode) * If the filesystem does not support extents, or the inode * already is extent-based, error out. */ - if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_INCOMPAT_EXTENTS) || + if (!ext4_has_feature_extents(inode->i_sb) || (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return -EINVAL; @@ -625,13 +624,11 @@ int ext4_ind_migrate(struct inode *inode) handle_t *handle; int ret; - if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_INCOMPAT_EXTENTS) || + if (!ext4_has_feature_extents(inode->i_sb) || (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return -EINVAL; - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC)) + if (ext4_has_feature_bigalloc(inode->i_sb)) return -EOPNOTSUPP; /* diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 8fd8e0d4c75e..52a79d48752c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2118,7 +2118,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, goto out; if (blocks == 1 && !dx_fallback && - EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { + ext4_has_feature_dir_index(sb)) { retval = make_indexed_dir(handle, &fname, dentry, inode, bh); bh = NULL; /* make_indexed_dir releases bh */ @@ -2388,8 +2388,7 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode) /* limit is 16-bit i_links_count */ if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) { set_nlink(inode, 1); - EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_DIR_NLINK); + ext4_set_feature_dir_nlink(inode->i_sb); } } } @@ -3352,8 +3351,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, if (retval) return retval; ent->de->inode = cpu_to_le32(ino); - if (EXT4_HAS_INCOMPAT_FEATURE(ent->dir->i_sb, - EXT4_FEATURE_INCOMPAT_FILETYPE)) + if (ext4_has_feature_filetype(ent->dir->i_sb)) ent->de->file_type = file_type; ent->dir->i_version++; ent->dir->i_ctime = ent->dir->i_mtime = diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index cf0c472047e3..469eeccee0bf 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -490,7 +490,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb, group_data[0].group != sbi->s_groups_count); reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); - meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + meta_bg = ext4_has_feature_meta_bg(sb); /* This transaction may be extended/restarted along the way */ handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA); @@ -680,8 +680,7 @@ static unsigned ext4_list_backups(struct super_block *sb, unsigned *three, int mult = 3; unsigned ret; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { + if (!ext4_has_feature_sparse_super(sb)) { ret = *min; *min += 1; return ret; @@ -1158,7 +1157,7 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, int i, gdb_off, gdb_num, err = 0; int meta_bg; - meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + meta_bg = ext4_has_feature_meta_bg(sb); for (i = 0; i < count; i++, group++) { int reserved_gdb = ext4_bg_has_super(sb, group) ? le16_to_cpu(es->s_reserved_gdt_blocks) : 0; @@ -1381,9 +1380,7 @@ static void ext4_update_super(struct super_block *sb, ext4_debug("free blocks count %llu", percpu_counter_read(&sbi->s_freeclusters_counter)); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_FLEX_BG) && - sbi->s_log_groups_per_flex) { + if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) { ext4_group_t flex_group; flex_group = ext4_flex_group(sbi, group_data[0].group); atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), @@ -1476,8 +1473,7 @@ exit_journal: int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); int gdb_num_end = ((group + flex_gd->count - 1) / EXT4_DESC_PER_BLOCK(sb)); - int meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_META_BG); + int meta_bg = ext4_has_feature_meta_bg(sb); sector_t old_gdb = 0; update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, @@ -1585,8 +1581,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); - if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { + if (gdb_off == 0 && !ext4_has_feature_sparse_super(sb)) { ext4_warning(sb, "Can't resize non-sparse filesystem further"); return -EPERM; } @@ -1604,9 +1599,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) } if (reserved_gdb || gdb_off == 0) { - if (!EXT4_HAS_COMPAT_FEATURE(sb, - EXT4_FEATURE_COMPAT_RESIZE_INODE) - || !le16_to_cpu(es->s_reserved_gdt_blocks)) { + if (ext4_has_feature_resize_inode(sb) || + !le16_to_cpu(es->s_reserved_gdt_blocks)) { ext4_warning(sb, "No reserved GDT blocks, can't resize"); return -EPERM; @@ -1825,8 +1819,8 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) if (err) goto errout; - EXT4_CLEAR_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE); - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + ext4_clear_feature_resize_inode(sb); + ext4_set_feature_meta_bg(sb); sbi->s_es->s_first_meta_bg = cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count)); @@ -1918,9 +1912,9 @@ retry: n_desc_blocks = num_desc_blocks(sb, n_group + 1); o_desc_blocks = num_desc_blocks(sb, sbi->s_groups_count); - meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + meta_bg = ext4_has_feature_meta_bg(sb); - if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) { + if (ext4_has_feature_resize_inode(sb)) { if (meta_bg) { ext4_error(sb, "resize_inode and meta_bg enabled " "simultaneously"); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f7519ad25cce..49fa4d48b6a4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -110,8 +110,7 @@ MODULE_ALIAS("ext3"); static int ext4_verify_csum_type(struct super_block *sb, struct ext4_super_block *es) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_feature_metadata_csum(sb)) return 1; return es->s_checksum_type == EXT4_CRC32C_CHKSUM; @@ -810,7 +809,7 @@ static void ext4_put_super(struct super_block *sb) ext4_xattr_put_super(sb); if (!(sb->s_flags & MS_RDONLY)) { - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_clear_feature_journal_needs_recovery(sb); es->s_state = cpu_to_le16(sbi->s_mount_state); } if (!(sb->s_flags & MS_RDONLY)) @@ -1284,7 +1283,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) "quota options when quota turned on"); return -1; } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { + if (ext4_has_feature_quota(sb)) { ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " "when QUOTA feature is enabled"); return -1; @@ -1643,8 +1642,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, "quota options when quota turned on"); return -1; } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_QUOTA)) { + if (ext4_has_feature_quota(sb)) { ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " "when QUOTA feature is enabled"); @@ -1703,7 +1701,7 @@ static int parse_options(char *options, struct super_block *sb, return 0; } #ifdef CONFIG_QUOTA - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && + if (ext4_has_feature_quota(sb) && (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) { ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA " "feature is enabled"); @@ -1927,7 +1925,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, es->s_mtime = cpu_to_le32(get_seconds()); ext4_update_dynamic_rev(sb); if (sbi->s_journal) - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_set_feature_journal_needs_recovery(sb); ext4_commit_super(sb, 1); done: @@ -2010,12 +2008,13 @@ failed: return 0; } -static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, +static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group, struct ext4_group_desc *gdp) { int offset; __u16 crc = 0; __le32 le_group = cpu_to_le32(block_group); + struct ext4_sb_info *sbi = EXT4_SB(sb); if (ext4_has_metadata_csum(sbi->s_sb)) { /* Use new metadata_csum algorithm */ @@ -2035,8 +2034,7 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, } /* old crc16 code */ - if (!(sbi->s_es->s_feature_ro_compat & - cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM))) + if (!ext4_has_feature_gdt_csum(sb)) return 0; offset = offsetof(struct ext4_group_desc, bg_checksum); @@ -2046,8 +2044,7 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, crc = crc16(crc, (__u8 *)gdp, offset); offset += sizeof(gdp->bg_checksum); /* skip checksum */ /* for checksum of struct ext4_group_desc do the rest...*/ - if ((sbi->s_es->s_feature_incompat & - cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && + if (ext4_has_feature_64bit(sb) && offset < le16_to_cpu(sbi->s_es->s_desc_size)) crc = crc16(crc, (__u8 *)gdp + offset, le16_to_cpu(sbi->s_es->s_desc_size) - @@ -2061,8 +2058,7 @@ int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group, struct ext4_group_desc *gdp) { if (ext4_has_group_desc_csum(sb) && - (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb), - block_group, gdp))) + (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp))) return 0; return 1; @@ -2073,7 +2069,7 @@ void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group, { if (!ext4_has_group_desc_csum(sb)) return; - gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp); + gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp); } /* Called at mount-time, super-block is locked */ @@ -2089,7 +2085,7 @@ static int ext4_check_descriptors(struct super_block *sb, int flexbg_flag = 0; ext4_group_t i, grp = sbi->s_groups_count; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) + if (ext4_has_feature_flex_bg(sb)) flexbg_flag = 1; ext4_debug("Checking group descriptors"); @@ -2133,7 +2129,7 @@ static int ext4_check_descriptors(struct super_block *sb, if (!ext4_group_desc_csum_verify(sb, i, gdp)) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Checksum for group %u failed (%u!=%u)", - i, le16_to_cpu(ext4_group_desc_csum(sbi, i, + i, le16_to_cpu(ext4_group_desc_csum(sb, i, gdp)), le16_to_cpu(gdp->bg_checksum)); if (!(sb->s_flags & MS_RDONLY)) { ext4_unlock_group(sb, i); @@ -2396,8 +2392,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || - nr < first_meta_bg) + if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg) return logical_sb_block + nr + 1; bg = sbi->s_desc_per_block * nr; if (ext4_bg_has_super(sb, bg)) @@ -2461,7 +2456,7 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) */ static int ext4_feature_set_ok(struct super_block *sb, int readonly) { - if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { + if (ext4_has_unknown_ext4_incompat_features(sb)) { ext4_msg(sb, KERN_ERR, "Couldn't mount because of " "unsupported optional features (%x)", @@ -2473,14 +2468,14 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) if (readonly) return 1; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_READONLY)) { + if (ext4_has_feature_readonly(sb)) { ext4_msg(sb, KERN_INFO, "filesystem is read-only"); sb->s_flags |= MS_RDONLY; return 1; } /* Check that feature set is OK for a read-write mount */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { + if (ext4_has_unknown_ext4_ro_compat_features(sb)) { ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " "unsupported optional features (%x)", (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & @@ -2491,7 +2486,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) * Large file size enabled file system can only be mounted * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { + if (ext4_has_feature_huge_file(sb)) { if (sizeof(blkcnt_t) < sizeof(u64)) { ext4_msg(sb, KERN_ERR, "Filesystem with huge files " "cannot be mounted RDWR without " @@ -2499,8 +2494,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) && - !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { + if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) { ext4_msg(sb, KERN_ERR, "Can't support bigalloc feature without " "extents feature\n"); @@ -2508,8 +2502,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) } #ifndef CONFIG_QUOTA - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && - !readonly) { + if (ext4_has_feature_quota(sb) && !readonly) { ext4_msg(sb, KERN_ERR, "Filesystem with quota feature cannot be mounted RDWR " "without CONFIG_QUOTA"); @@ -2966,7 +2959,7 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp, ext4_group_t i, ngroups = ext4_get_groups_count(sb); int s, j, count = 0; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) + if (!ext4_has_feature_bigalloc(sb)) return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + sbi->s_itb_per_group + 2); @@ -3068,7 +3061,7 @@ static void ext4_set_resv_clusters(struct super_block *sb) * hole punching doesn't need new metadata... This is needed especially * to keep ext2/3 backward compatibility. */ - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) + if (!ext4_has_feature_extents(sb)) return; /* * By default we reserve 2% or 4096 clusters, whichever is smaller. @@ -3167,9 +3160,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); /* Warn if metadata_csum and gdt_csum are both set. */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && - EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) + if (ext4_has_feature_metadata_csum(sb) && + ext4_has_feature_gdt_csum(sb)) ext4_warning(sb, "metadata_csum and uninit_bg are " "redundant flags; please run fsck."); @@ -3182,8 +3174,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } /* Load the checksum driver */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + if (ext4_has_feature_metadata_csum(sb)) { sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(sbi->s_chksum_driver)) { ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); @@ -3203,7 +3194,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } /* Precompute checksum seed for all metadata */ - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_CSUM_SEED)) + if (ext4_has_feature_csum_seed(sb)) sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed); else if (ext4_has_metadata_csum(sb)) sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, @@ -3308,17 +3299,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && - (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || - EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || - EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) + (ext4_has_compat_features(sb) || + ext4_has_ro_compat_features(sb) || + ext4_has_incompat_features(sb))) ext4_msg(sb, KERN_WARNING, "feature flags set on rev 0 fs, " "running e2fsck is recommended"); if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) { set_opt2(sb, HURD_COMPAT); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_64BIT)) { + if (ext4_has_feature_64bit(sb)) { ext4_msg(sb, KERN_ERR, "The Hurd can't support 64-bit file systems"); goto failed_mount; @@ -3376,8 +3366,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } } - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT) && - es->s_encryption_level) { + if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) { ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d", es->s_encryption_level); goto failed_mount; @@ -3409,8 +3398,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } } - has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_HUGE_FILE); + has_huge_files = ext4_has_feature_huge_file(sb); sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, has_huge_files); sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); @@ -3434,7 +3422,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } sbi->s_desc_size = le16_to_cpu(es->s_desc_size); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { + if (ext4_has_feature_64bit(sb)) { if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || sbi->s_desc_size > EXT4_MAX_DESC_SIZE || !is_power_of_2(sbi->s_desc_size)) { @@ -3465,7 +3453,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; - if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { + if (ext4_has_feature_dir_index(sb)) { i = le32_to_cpu(es->s_flags); if (i & EXT2_FLAGS_UNSIGNED_HASH) sbi->s_hash_unsigned = 3; @@ -3485,8 +3473,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* Handle clustersize */ clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); - has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC); + has_bigalloc = ext4_has_feature_bigalloc(sb); if (has_bigalloc) { if (clustersize < blocksize) { ext4_msg(sb, KERN_ERR, @@ -3645,7 +3632,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_xattr = ext4_xattr_handlers; #ifdef CONFIG_QUOTA sb->dq_op = &ext4_quota_operations; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) + if (ext4_has_feature_quota(sb)) sb->s_qcop = &dquot_quotactl_sysfile_ops; else sb->s_qcop = &ext4_qctl_operations; @@ -3659,11 +3646,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_root = NULL; needs_recovery = (es->s_last_orphan != 0 || - EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_RECOVER)); + ext4_has_feature_journal_needs_recovery(sb)); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && - !(sb->s_flags & MS_RDONLY)) + if (ext4_has_feature_mmp(sb) && !(sb->s_flags & MS_RDONLY)) if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) goto failed_mount3a; @@ -3671,12 +3656,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * The first inode we look at is the journal inode. Don't try * root first: it may be modified in the journal! */ - if (!test_opt(sb, NOLOAD) && - EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { + if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) { if (ext4_load_journal(sb, es, journal_devnum)) goto failed_mount3a; } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && - EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { + ext4_has_feature_journal_needs_recovery(sb)) { ext4_msg(sb, KERN_ERR, "required journal recovery " "suppressed and not mounted read-only"); goto failed_mount_wq; @@ -3687,7 +3671,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto no_journal; } - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) && + if (ext4_has_feature_64bit(sb) && !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_64BIT)) { ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); @@ -3739,18 +3723,16 @@ no_journal: } } - if ((DUMMY_ENCRYPTION_ENABLED(sbi) || - EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) && + if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) && (blocksize != PAGE_CACHE_SIZE)) { ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs encryption"); goto failed_mount_wq; } - if (DUMMY_ENCRYPTION_ENABLED(sbi) && - !(sb->s_flags & MS_RDONLY) && - !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) { - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT); + if (DUMMY_ENCRYPTION_ENABLED(sbi) && !(sb->s_flags & MS_RDONLY) && + !ext4_has_feature_encrypt(sb)) { + ext4_set_feature_encrypt(sb); ext4_commit_super(sb, 1); } @@ -3809,8 +3791,7 @@ no_journal: if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { sbi->s_want_extra_isize = sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { + if (ext4_has_feature_extra_isize(sb)) { if (sbi->s_want_extra_isize < le16_to_cpu(es->s_want_extra_isize)) sbi->s_want_extra_isize = @@ -3869,7 +3850,7 @@ no_journal: goto failed_mount6; } - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) + if (ext4_has_feature_flex_bg(sb)) if (!ext4_fill_flex_info(sb)) { ext4_msg(sb, KERN_ERR, "unable to initialize " @@ -3887,8 +3868,7 @@ no_journal: #ifdef CONFIG_QUOTA /* Enable quota usage during mount. */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && - !(sb->s_flags & MS_RDONLY)) { + if (ext4_has_feature_quota(sb) && !(sb->s_flags & MS_RDONLY)) { err = ext4_enable_quotas(sb); if (err) goto failed_mount8; @@ -4029,7 +4009,7 @@ static journal_t *ext4_get_journal(struct super_block *sb, struct inode *journal_inode; journal_t *journal; - BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + BUG_ON(!ext4_has_feature_journal(sb)); /* First, test for the existence of a valid inode on disk. Bad * things happen if we iget() an unused inode, as the subsequent @@ -4079,7 +4059,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, struct ext4_super_block *es; struct block_device *bdev; - BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + BUG_ON(!ext4_has_feature_journal(sb)); bdev = ext4_blkdev_get(j_dev, sb); if (bdev == NULL) @@ -4171,7 +4151,7 @@ static int ext4_load_journal(struct super_block *sb, int err = 0; int really_read_only; - BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + BUG_ON(!ext4_has_feature_journal(sb)); if (journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { @@ -4188,7 +4168,7 @@ static int ext4_load_journal(struct super_block *sb, * crash? For recovery, we need to check in advance whether we * can get read-write access to the device. */ - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { + if (ext4_has_feature_journal_needs_recovery(sb)) { if (sb->s_flags & MS_RDONLY) { ext4_msg(sb, KERN_INFO, "INFO: recovery " "required on readonly filesystem"); @@ -4219,7 +4199,7 @@ static int ext4_load_journal(struct super_block *sb, if (!(journal->j_flags & JBD2_BARRIER)) ext4_msg(sb, KERN_INFO, "barriers disabled"); - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) + if (!ext4_has_feature_journal_needs_recovery(sb)) err = jbd2_journal_wipe(journal, !really_read_only); if (!err) { char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); @@ -4333,7 +4313,7 @@ static void ext4_mark_recovery_complete(struct super_block *sb, { journal_t *journal = EXT4_SB(sb)->s_journal; - if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { + if (!ext4_has_feature_journal(sb)) { BUG_ON(journal != NULL); return; } @@ -4341,9 +4321,9 @@ static void ext4_mark_recovery_complete(struct super_block *sb, if (jbd2_journal_flush(journal) < 0) goto out; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && + if (ext4_has_feature_journal_needs_recovery(sb) && sb->s_flags & MS_RDONLY) { - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_clear_feature_journal_needs_recovery(sb); ext4_commit_super(sb, 1); } @@ -4363,7 +4343,7 @@ static void ext4_clear_journal_err(struct super_block *sb, int j_errno; const char *errstr; - BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + BUG_ON(!ext4_has_feature_journal(sb)); journal = EXT4_SB(sb)->s_journal; @@ -4478,7 +4458,7 @@ static int ext4_freeze(struct super_block *sb) goto out; /* Journal blocked and flushed, clear needs_recovery flag. */ - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_clear_feature_journal_needs_recovery(sb); } error = ext4_commit_super(sb, 1); @@ -4500,7 +4480,7 @@ static int ext4_unfreeze(struct super_block *sb) if (EXT4_SB(sb)->s_journal) { /* Reset the needs_recovery flag before the fs is unlocked. */ - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_set_feature_journal_needs_recovery(sb); } ext4_commit_super(sb, 1); @@ -4653,8 +4633,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ext4_mark_recovery_complete(sb, es); } else { /* Make sure we can mount this feature set readwrite */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_READONLY) || + if (ext4_has_feature_readonly(sb) || !ext4_feature_set_ok(sb, 0)) { err = -EROFS; goto restore_opts; @@ -4670,7 +4649,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (!ext4_group_desc_csum_verify(sb, g, gdp)) { ext4_msg(sb, KERN_ERR, "ext4_remount: Checksum for group %u failed (%u!=%u)", - g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), + g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)), le16_to_cpu(gdp->bg_checksum)); err = -EFSBADCRC; goto restore_opts; @@ -4702,8 +4681,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) sbi->s_mount_state = le16_to_cpu(es->s_state); if (!ext4_setup_super(sb, es, 0)) sb->s_flags &= ~MS_RDONLY; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_MMP)) + if (ext4_has_feature_mmp(sb)) if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) { err = -EROFS; @@ -4736,8 +4714,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (enable_quota) { if (sb_any_quota_suspended(sb)) dquot_resume(sb, -1); - else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_QUOTA)) { + else if (ext4_has_feature_quota(sb)) { err = ext4_enable_quotas(sb); if (err) goto restore_opts; @@ -4881,7 +4858,7 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot) struct ext4_sb_info *sbi = EXT4_SB(sb); /* Are we journaling quotas? */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) || + if (ext4_has_feature_quota(sb) || sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); return ext4_write_dquot(dquot); @@ -4969,7 +4946,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) }; - BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)); + BUG_ON(!ext4_has_feature_quota(sb)); if (!qf_inums[type]) return -EPERM; @@ -5163,11 +5140,11 @@ static inline void unregister_as_ext2(void) static inline int ext2_feature_set_ok(struct super_block *sb) { - if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP)) + if (ext4_has_unknown_ext2_incompat_features(sb)) return 0; if (sb->s_flags & MS_RDONLY) return 1; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP)) + if (ext4_has_unknown_ext2_ro_compat_features(sb)) return 0; return 1; } @@ -5192,13 +5169,13 @@ static inline void unregister_as_ext3(void) static inline int ext3_feature_set_ok(struct super_block *sb) { - if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP)) + if (ext4_has_unknown_ext3_incompat_features(sb)) return 0; - if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) + if (!ext4_has_feature_journal(sb)) return 0; if (sb->s_flags & MS_RDONLY) return 1; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) + if (ext4_has_unknown_ext3_ro_compat_features(sb)) return 0; return 1; } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 7649422ed7a1..984448c6f5f0 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -525,12 +525,12 @@ errout: static void ext4_xattr_update_super_block(handle_t *handle, struct super_block *sb) { - if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR)) + if (ext4_has_feature_xattr(sb)) return; BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { - EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR); + ext4_set_feature_xattr(sb); ext4_handle_dirty_super(handle, sb); } } From 56316a0d28f251dae6a3bc2b6d50e7c25389871f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 17 Oct 2015 16:18:45 -0400 Subject: [PATCH 16/26] jbd2: clean up feature test macros with predicate functions Create separate predicate functions to test/set/clear feature flags, thereby replacing the wordy old macros. Furthermore, clean out the places where we open-coded feature tests. Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o --- fs/jbd2/commit.c | 22 +++++--------- fs/jbd2/journal.c | 12 ++++---- fs/jbd2/recovery.c | 18 +++++------ fs/jbd2/revoke.c | 4 +-- include/linux/jbd2.h | 71 ++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 91 insertions(+), 36 deletions(-) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 362e5f614450..36345fefa3ff 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -142,8 +142,7 @@ static int journal_submit_commit_record(journal_t *journal, tmp->h_commit_sec = cpu_to_be64(now.tv_sec); tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec); - if (JBD2_HAS_COMPAT_FEATURE(journal, - JBD2_FEATURE_COMPAT_CHECKSUM)) { + if (jbd2_has_feature_checksum(journal)) { tmp->h_chksum_type = JBD2_CRC32_CHKSUM; tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; tmp->h_chksum[0] = cpu_to_be32(crc32_sum); @@ -157,8 +156,7 @@ static int journal_submit_commit_record(journal_t *journal, bh->b_end_io = journal_end_buffer_io_sync; if (journal->j_flags & JBD2_BARRIER && - !JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) + !jbd2_has_feature_async_commit(journal)) ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh); else ret = submit_bh(WRITE_SYNC, bh); @@ -317,7 +315,7 @@ static void write_tag_block(journal_t *j, journal_block_tag_t *tag, unsigned long long block) { tag->t_blocknr = cpu_to_be32(block & (u32)~0); - if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_64BIT)) + if (jbd2_has_feature_64bit(j)) tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); } @@ -356,7 +354,7 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, bh->b_size); kunmap_atomic(addr); - if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3)) + if (jbd2_has_feature_csum3(j)) tag3->t_checksum = cpu_to_be32(csum32); else tag->t_checksum = cpu_to_be16(csum32); @@ -730,8 +728,7 @@ start_journal_io: /* * Compute checksum. */ - if (JBD2_HAS_COMPAT_FEATURE(journal, - JBD2_FEATURE_COMPAT_CHECKSUM)) { + if (jbd2_has_feature_checksum(journal)) { crc32_sum = jbd2_checksum_data(crc32_sum, bh); } @@ -797,8 +794,7 @@ start_journal_io: blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL); /* Done it all: now write the commit record asynchronously. */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { + if (jbd2_has_feature_async_commit(journal)) { err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) @@ -889,8 +885,7 @@ start_journal_io: commit_transaction->t_state = T_COMMIT_JFLUSH; write_unlock(&journal->j_state_lock); - if (!JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { + if (!jbd2_has_feature_async_commit(journal)) { err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) @@ -898,8 +893,7 @@ start_journal_io: } if (cbh) err = journal_wait_on_commit_record(journal, cbh); - if (JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && + if (jbd2_has_feature_async_commit(journal) && journal->j_flags & JBD2_BARRIER) { blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL); } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 474c1781c0ce..eb8a7546e8a4 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1523,8 +1523,8 @@ static int journal_get_superblock(journal_t *journal) goto out; } - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) && - JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) { + if (jbd2_has_feature_csum2(journal) && + jbd2_has_feature_csum3(journal)) { /* Can't have checksum v2 and v3 at the same time! */ printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 " "at the same time!\n"); @@ -1532,7 +1532,7 @@ static int journal_get_superblock(journal_t *journal) } if (jbd2_journal_has_csum_v2or3_feature(journal) && - JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) { + jbd2_has_feature_checksum(journal)) { /* Can't have checksum v1 and v2 on at the same time! */ printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 " "at the same time!\n"); @@ -2198,15 +2198,15 @@ size_t journal_tag_bytes(journal_t *journal) { size_t sz; - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) + if (jbd2_has_feature_csum3(journal)) return sizeof(journal_block_tag3_t); sz = sizeof(journal_block_tag_t); - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_has_feature_csum2(journal)) sz += sizeof(__u16); - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) + if (jbd2_has_feature_64bit(journal)) return sz; else return sz - sizeof(__u32); diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 5c836d78af3b..7f277e49fe88 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -342,7 +342,7 @@ static inline unsigned long long read_tag_block(journal_t *journal, journal_block_tag_t *tag) { unsigned long long block = be32_to_cpu(tag->t_blocknr); - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) + if (jbd2_has_feature_64bit(journal)) block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; return block; } @@ -411,7 +411,7 @@ static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); - if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3)) + if (jbd2_has_feature_csum3(j)) return tag3->t_checksum == cpu_to_be32(csum32); else return tag->t_checksum == cpu_to_be16(csum32); @@ -538,8 +538,7 @@ static int do_one_pass(journal_t *journal, * just skip over the blocks it describes. */ if (pass != PASS_REPLAY) { if (pass == PASS_SCAN && - JBD2_HAS_COMPAT_FEATURE(journal, - JBD2_FEATURE_COMPAT_CHECKSUM) && + jbd2_has_feature_checksum(journal) && !info->end_transaction) { if (calc_chksums(journal, bh, &next_log_block, @@ -694,8 +693,7 @@ static int do_one_pass(journal_t *journal, * much to do other than move on to the next sequence * number. */ if (pass == PASS_SCAN && - JBD2_HAS_COMPAT_FEATURE(journal, - JBD2_FEATURE_COMPAT_CHECKSUM)) { + jbd2_has_feature_checksum(journal)) { int chksum_err, chksum_seen; struct commit_header *cbh = (struct commit_header *)bh->b_data; @@ -735,8 +733,7 @@ static int do_one_pass(journal_t *journal, if (chksum_err) { info->end_transaction = next_commit_ID; - if (!JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ + if (!jbd2_has_feature_async_commit(journal)) { journal->j_failed_commit = next_commit_ID; brelse(bh); @@ -750,8 +747,7 @@ static int do_one_pass(journal_t *journal, bh->b_data)) { info->end_transaction = next_commit_ID; - if (!JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { + if (!jbd2_has_feature_async_commit(journal)) { journal->j_failed_commit = next_commit_ID; brelse(bh); @@ -859,7 +855,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, return -EINVAL; max = rcount; - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) + if (jbd2_has_feature_64bit(journal)) record_len = 8; while (offset + record_len <= max) { diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 0abf2e7f725b..705ae577882b 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -589,7 +589,7 @@ static void write_one_revoke_record(journal_t *journal, if (jbd2_journal_has_csum_v2or3(journal)) csum_size = sizeof(struct jbd2_journal_revoke_tail); - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) + if (jbd2_has_feature_64bit(journal)) sz = 8; else sz = 4; @@ -619,7 +619,7 @@ static void write_one_revoke_record(journal_t *journal, *descriptorp = descriptor; } - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) + if (jbd2_has_feature_64bit(journal)) * ((__be64 *)(&descriptor->b_data[offset])) = cpu_to_be64(record->blocknr); else diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f2a4b07a6ce6..b5ca2314cace 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -278,6 +278,7 @@ typedef struct journal_superblock_s /* 0x0400 */ } journal_superblock_t; +/* Use the jbd2_{has,set,clear}_feature_* helpers; these will be removed */ #define JBD2_HAS_COMPAT_FEATURE(j,mask) \ ((j)->j_format_version >= 2 && \ ((j)->j_superblock->s_feature_compat & cpu_to_be32((mask)))) @@ -288,7 +289,7 @@ typedef struct journal_superblock_s ((j)->j_format_version >= 2 && \ ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) -#define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001 +#define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001 #define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 @@ -296,6 +297,8 @@ typedef struct journal_superblock_s #define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 #define JBD2_FEATURE_INCOMPAT_CSUM_V3 0x00000010 +/* See "journal feature predicate functions" below */ + /* Features known to this kernel version: */ #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 @@ -1034,6 +1037,69 @@ struct journal_s __u32 j_csum_seed; }; +/* journal feature predicate functions */ +#define JBD2_FEATURE_COMPAT_FUNCS(name, flagname) \ +static inline bool jbd2_has_feature_##name(journal_t *j) \ +{ \ + return ((j)->j_format_version >= 2 && \ + ((j)->j_superblock->s_feature_compat & \ + cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname)) != 0); \ +} \ +static inline void jbd2_set_feature_##name(journal_t *j) \ +{ \ + (j)->j_superblock->s_feature_compat |= \ + cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname); \ +} \ +static inline void jbd2_clear_feature_##name(journal_t *j) \ +{ \ + (j)->j_superblock->s_feature_compat &= \ + ~cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname); \ +} + +#define JBD2_FEATURE_RO_COMPAT_FUNCS(name, flagname) \ +static inline bool jbd2_has_feature_##name(journal_t *j) \ +{ \ + return ((j)->j_format_version >= 2 && \ + ((j)->j_superblock->s_feature_ro_compat & \ + cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname)) != 0); \ +} \ +static inline void jbd2_set_feature_##name(journal_t *j) \ +{ \ + (j)->j_superblock->s_feature_ro_compat |= \ + cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname); \ +} \ +static inline void jbd2_clear_feature_##name(journal_t *j) \ +{ \ + (j)->j_superblock->s_feature_ro_compat &= \ + ~cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname); \ +} + +#define JBD2_FEATURE_INCOMPAT_FUNCS(name, flagname) \ +static inline bool jbd2_has_feature_##name(journal_t *j) \ +{ \ + return ((j)->j_format_version >= 2 && \ + ((j)->j_superblock->s_feature_incompat & \ + cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname)) != 0); \ +} \ +static inline void jbd2_set_feature_##name(journal_t *j) \ +{ \ + (j)->j_superblock->s_feature_incompat |= \ + cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname); \ +} \ +static inline void jbd2_clear_feature_##name(journal_t *j) \ +{ \ + (j)->j_superblock->s_feature_incompat &= \ + ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname); \ +} + +JBD2_FEATURE_COMPAT_FUNCS(checksum, CHECKSUM) + +JBD2_FEATURE_INCOMPAT_FUNCS(revoke, REVOKE) +JBD2_FEATURE_INCOMPAT_FUNCS(64bit, 64BIT) +JBD2_FEATURE_INCOMPAT_FUNCS(async_commit, ASYNC_COMMIT) +JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2) +JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) + /* * Journal flag definitions */ @@ -1340,8 +1406,7 @@ extern size_t journal_tag_bytes(journal_t *journal); static inline bool jbd2_journal_has_csum_v2or3_feature(journal_t *j) { - return JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2) || - JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3); + return jbd2_has_feature_csum2(j) || jbd2_has_feature_csum3(j); } static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) From 9008a58e5dcee014f5de69d154e2620870f9224e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 17 Oct 2015 21:33:24 -0400 Subject: [PATCH 17/26] ext4: make the bitmap read routines return real error codes Make the bitmap reaading routines return real error codes (EIO, EFSCORRUPTED, EFSBADCRC) which can then be reflected back to userspace for more precise diagnosis work. In particular, this means that mballoc no longer claims that we're out of memory if the block bitmaps become corrupt. Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o --- fs/ext4/balloc.c | 78 ++++++++++++++++++--------------- fs/ext4/ialloc.c | 108 ++++++++++++++++++++++++++++++---------------- fs/ext4/mballoc.c | 46 +++++++++++++------- 3 files changed, 145 insertions(+), 87 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 32184c16af16..ec0668a60678 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -191,6 +191,7 @@ static int ext4_init_block_bitmap(struct super_block *sb, /* If checksum is bad mark all blocks used to prevent allocation * essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { + ext4_error(sb, "Checksum bad for group %u", block_group); grp = ext4_get_group_info(sb, block_group); if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) percpu_counter_sub(&sbi->s_freeclusters_counter, @@ -360,19 +361,31 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, return 0; } -static void ext4_validate_block_bitmap(struct super_block *sb, - struct ext4_group_desc *desc, - ext4_group_t block_group, - struct buffer_head *bh) +static int ext4_validate_block_bitmap(struct super_block *sb, + struct ext4_group_desc *desc, + ext4_group_t block_group, + struct buffer_head *bh) { ext4_fsblk_t blk; struct ext4_group_info *grp = ext4_get_group_info(sb, block_group); struct ext4_sb_info *sbi = EXT4_SB(sb); - if (buffer_verified(bh) || EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) - return; + if (buffer_verified(bh)) + return 0; + if (EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) + return -EFSCORRUPTED; ext4_lock_group(sb, block_group); + if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, + desc, bh))) { + ext4_unlock_group(sb, block_group); + ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); + if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) + percpu_counter_sub(&sbi->s_freeclusters_counter, + grp->bb_free); + set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); + return -EFSBADCRC; + } blk = ext4_valid_block_bitmap(sb, desc, block_group, bh); if (unlikely(blk != 0)) { ext4_unlock_group(sb, block_group); @@ -382,20 +395,11 @@ static void ext4_validate_block_bitmap(struct super_block *sb, percpu_counter_sub(&sbi->s_freeclusters_counter, grp->bb_free); set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); - return; - } - if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, - desc, bh))) { - ext4_unlock_group(sb, block_group); - ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); - if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) - percpu_counter_sub(&sbi->s_freeclusters_counter, - grp->bb_free); - set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); - return; + return -EFSCORRUPTED; } set_buffer_verified(bh); ext4_unlock_group(sb, block_group); + return 0; } /** @@ -414,17 +418,18 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) struct ext4_group_desc *desc; struct buffer_head *bh; ext4_fsblk_t bitmap_blk; + int err; desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) - return NULL; + return ERR_PTR(-EFSCORRUPTED); bitmap_blk = ext4_block_bitmap(sb, desc); bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { ext4_error(sb, "Cannot get buffer for block bitmap - " "block_group = %u, block_bitmap = %llu", block_group, bitmap_blk); - return NULL; + return ERR_PTR(-ENOMEM); } if (bitmap_uptodate(bh)) @@ -437,7 +442,6 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) } ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { - int err; err = ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); @@ -445,7 +449,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) ext4_unlock_group(sb, block_group); unlock_buffer(bh); if (err) - ext4_error(sb, "Checksum bad for grp %u", block_group); + goto out; goto verify; } ext4_unlock_group(sb, block_group); @@ -468,11 +472,13 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) submit_bh(READ | REQ_META | REQ_PRIO, bh); return bh; verify: - ext4_validate_block_bitmap(sb, desc, block_group, bh); - if (buffer_verified(bh)) - return bh; + err = ext4_validate_block_bitmap(sb, desc, block_group, bh); + if (err) + goto out; + return bh; +out: put_bh(bh); - return NULL; + return ERR_PTR(err); } /* Returns 0 on success, 1 on error */ @@ -485,32 +491,32 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, return 0; desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) - return 1; + return -EFSCORRUPTED; wait_on_buffer(bh); if (!buffer_uptodate(bh)) { ext4_error(sb, "Cannot read block bitmap - " "block_group = %u, block_bitmap = %llu", block_group, (unsigned long long) bh->b_blocknr); - return 1; + return -EIO; } clear_buffer_new(bh); /* Panic or remount fs read-only if block bitmap is invalid */ - ext4_validate_block_bitmap(sb, desc, block_group, bh); - /* ...but check for error just in case errors=continue. */ - return !buffer_verified(bh); + return ext4_validate_block_bitmap(sb, desc, block_group, bh); } struct buffer_head * ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) { struct buffer_head *bh; + int err; bh = ext4_read_block_bitmap_nowait(sb, block_group); - if (!bh) - return NULL; - if (ext4_wait_block_bitmap(sb, block_group, bh)) { + if (IS_ERR(bh)) + return bh; + err = ext4_wait_block_bitmap(sb, block_group, bh); + if (err) { put_bh(bh); - return NULL; + return ERR_PTR(err); } return bh; } @@ -681,8 +687,10 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) desc_count += ext4_free_group_clusters(sb, gdp); brelse(bitmap_bh); bitmap_bh = ext4_read_block_bitmap(sb, i); - if (bitmap_bh == NULL) + if (IS_ERR(bitmap_bh)) { + bitmap_bh = NULL; continue; + } x = ext4_count_free(bitmap_bh->b_data, EXT4_CLUSTERS_PER_GROUP(sb) / 8); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 0d7f06ed7ee2..1b8024d26f65 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -64,7 +64,7 @@ void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) } /* Initializes an uninitialized inode bitmap */ -static unsigned ext4_init_inode_bitmap(struct super_block *sb, +static int ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, ext4_group_t block_group, struct ext4_group_desc *gdp) @@ -89,7 +89,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, count); } set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); - return 0; + return -EFSBADCRC; } memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); @@ -99,7 +99,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, EXT4_INODES_PER_GROUP(sb) / 8); ext4_group_desc_csum_set(sb, block_group, gdp); - return EXT4_INODES_PER_GROUP(sb); + return 0; } void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) @@ -112,6 +112,42 @@ void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) put_bh(bh); } +static int ext4_validate_inode_bitmap(struct super_block *sb, + struct ext4_group_desc *desc, + ext4_group_t block_group, + struct buffer_head *bh) +{ + ext4_fsblk_t blk; + struct ext4_group_info *grp = ext4_get_group_info(sb, block_group); + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (buffer_verified(bh)) + return 0; + if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) + return -EFSCORRUPTED; + + ext4_lock_group(sb, block_group); + blk = ext4_inode_bitmap(sb, desc); + if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, + EXT4_INODES_PER_GROUP(sb) / 8)) { + ext4_unlock_group(sb, block_group); + ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " + "inode_bitmap = %llu", block_group, blk); + grp = ext4_get_group_info(sb, block_group); + if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { + int count; + count = ext4_free_inodes_count(sb, desc); + percpu_counter_sub(&sbi->s_freeinodes_counter, + count); + } + set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); + return -EFSBADCRC; + } + set_buffer_verified(bh); + ext4_unlock_group(sb, block_group); + return 0; +} + /* * Read the inode allocation bitmap for a given block_group, reading * into the specified slot in the superblock's bitmap cache. @@ -124,12 +160,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) struct ext4_group_desc *desc; struct buffer_head *bh = NULL; ext4_fsblk_t bitmap_blk; - struct ext4_group_info *grp; - struct ext4_sb_info *sbi = EXT4_SB(sb); + int err; desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) - return NULL; + return ERR_PTR(-EFSCORRUPTED); bitmap_blk = ext4_inode_bitmap(sb, desc); bh = sb_getblk(sb, bitmap_blk); @@ -137,7 +172,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) ext4_error(sb, "Cannot read inode bitmap - " "block_group = %u, inode_bitmap = %llu", block_group, bitmap_blk); - return NULL; + return ERR_PTR(-EIO); } if (bitmap_uptodate(bh)) goto verify; @@ -150,12 +185,14 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { - ext4_init_inode_bitmap(sb, bh, block_group, desc); + err = ext4_init_inode_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); set_buffer_verified(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); + if (err) + goto out; return bh; } ext4_unlock_group(sb, block_group); @@ -182,31 +219,17 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) ext4_error(sb, "Cannot read inode bitmap - " "block_group = %u, inode_bitmap = %llu", block_group, bitmap_blk); - return NULL; + return ERR_PTR(-EIO); } verify: - ext4_lock_group(sb, block_group); - if (!buffer_verified(bh) && - !ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, - EXT4_INODES_PER_GROUP(sb) / 8)) { - ext4_unlock_group(sb, block_group); - put_bh(bh); - ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " - "inode_bitmap = %llu", block_group, bitmap_blk); - grp = ext4_get_group_info(sb, block_group); - if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { - int count; - count = ext4_free_inodes_count(sb, desc); - percpu_counter_sub(&sbi->s_freeinodes_counter, - count); - } - set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); - return NULL; - } - ext4_unlock_group(sb, block_group); - set_buffer_verified(bh); + err = ext4_validate_inode_bitmap(sb, desc, block_group, bh); + if (err) + goto out; return bh; +out: + put_bh(bh); + return ERR_PTR(err); } /* @@ -286,8 +309,15 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) bitmap_bh = ext4_read_inode_bitmap(sb, block_group); /* Don't bother if the inode bitmap is corrupt. */ grp = ext4_get_group_info(sb, block_group); - if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) || !bitmap_bh) + if (IS_ERR(bitmap_bh)) { + fatal = PTR_ERR(bitmap_bh); + bitmap_bh = NULL; goto error_return; + } + if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) { + fatal = -EFSCORRUPTED; + goto error_return; + } BUFFER_TRACE(bitmap_bh, "get_write_access"); fatal = ext4_journal_get_write_access(handle, bitmap_bh); @@ -826,7 +856,9 @@ got_group: brelse(inode_bitmap_bh); inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); /* Skip groups with suspicious inode tables */ - if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) || !inode_bitmap_bh) { + if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) || + IS_ERR(inode_bitmap_bh)) { + inode_bitmap_bh = NULL; if (++group == ngroups) group = 0; continue; @@ -902,8 +934,8 @@ got: struct buffer_head *block_bitmap_bh; block_bitmap_bh = ext4_read_block_bitmap(sb, group); - if (!block_bitmap_bh) { - err = -EIO; + if (IS_ERR(block_bitmap_bh)) { + err = PTR_ERR(block_bitmap_bh); goto out; } BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); @@ -1123,8 +1155,10 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); bitmap_bh = ext4_read_inode_bitmap(sb, block_group); - if (!bitmap_bh) { - ext4_warning(sb, "inode bitmap error for orphan %lu", ino); + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + ext4_warning(sb, "inode bitmap error %ld for orphan %lu", + ino, err); goto error; } @@ -1199,8 +1233,10 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) desc_count += ext4_free_inodes_count(sb, gdp); brelse(bitmap_bh); bitmap_bh = ext4_read_inode_bitmap(sb, i); - if (!bitmap_bh) + if (IS_ERR(bitmap_bh)) { + bitmap_bh = NULL; continue; + } x = ext4_count_free(bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb) / 8); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b0f7ee57630b..6794ff47cfaf 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -874,8 +874,10 @@ static int ext4_mb_init_cache(struct page *page, char *incore) bh[i] = NULL; continue; } - if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) { - err = -ENOMEM; + bh[i] = ext4_read_block_bitmap_nowait(sb, group); + if (IS_ERR(bh[i])) { + err = PTR_ERR(bh[i]); + bh[i] = NULL; goto out; } mb_debug(1, "read bitmap for group %u\n", group); @@ -883,8 +885,13 @@ static int ext4_mb_init_cache(struct page *page, char *incore) /* wait for I/O completion */ for (i = 0, group = first_group; i < groups_per_page; i++, group++) { - if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) - err = -EIO; + int err2; + + if (!bh[i]) + continue; + err2 = ext4_wait_block_bitmap(sb, group, bh[i]); + if (!err) + err = err2; } first_block = page->index * blocks_per_page; @@ -2447,7 +2454,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, kmalloc(sb->s_blocksize, GFP_NOFS); BUG_ON(meta_group_info[i]->bb_bitmap == NULL); bh = ext4_read_block_bitmap(sb, group); - BUG_ON(bh == NULL); + BUG_ON(IS_ERR_OR_NULL(bh)); memcpy(meta_group_info[i]->bb_bitmap, bh->b_data, sb->s_blocksize); put_bh(bh); @@ -2889,10 +2896,12 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, sb = ac->ac_sb; sbi = EXT4_SB(sb); - err = -EIO; bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group); - if (!bitmap_bh) + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + bitmap_bh = NULL; goto out_err; + } BUFFER_TRACE(bitmap_bh, "getting write access"); err = ext4_journal_get_write_access(handle, bitmap_bh); @@ -3836,8 +3845,10 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, return 0; bitmap_bh = ext4_read_block_bitmap(sb, group); - if (bitmap_bh == NULL) { - ext4_error(sb, "Error reading block bitmap for %u", group); + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + ext4_error(sb, "Error %d reading block bitmap for %u", + err, group); return 0; } @@ -4008,9 +4019,10 @@ repeat: } bitmap_bh = ext4_read_block_bitmap(sb, group); - if (bitmap_bh == NULL) { - ext4_error(sb, "Error reading block bitmap for %u", - group); + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + ext4_error(sb, "Error %d reading block bitmap for %u", + err, group); ext4_mb_unload_buddy(&e4b); continue; } @@ -4754,8 +4766,9 @@ do_more: } count_clusters = EXT4_NUM_B2C(sbi, count); bitmap_bh = ext4_read_block_bitmap(sb, block_group); - if (!bitmap_bh) { - err = -EIO; + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + bitmap_bh = NULL; goto error_return; } gdp = ext4_get_group_desc(sb, block_group, &gd_bh); @@ -4924,8 +4937,9 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, } bitmap_bh = ext4_read_block_bitmap(sb, block_group); - if (!bitmap_bh) { - err = -EIO; + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + bitmap_bh = NULL; goto error_return; } From 9c02ac97989d544c89f049e8add7149aaea01671 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Sat, 17 Oct 2015 22:28:21 -0400 Subject: [PATCH 18/26] ext4: fix xfstest generic/269 double revoked buffer bug with bigalloc When you repeatly execute xfstest generic/269 with bigalloc_1k option enabled using the below command: "./kvm-xfstests -c bigalloc_1k -m nodelalloc -C 1000 generic/269" you can easily see the below bug message. "JBD2 unexpected failure: jbd2_journal_revoke: !buffer_revoked(bh);" This means that an already revoked buffer is erroneously revoked again and it is caused by doing revoke for the buffer at the wrong position in ext4_free_blocks(). We need to re-position the buffer revoke procedure for an unspecified buffer after checking the cluster boundary for bigalloc option. If not, some part of the cluster can be doubly revoked. Signed-off-by: Daeho Jeong --- fs/ext4/mballoc.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6794ff47cfaf..61eaf74dca37 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4687,22 +4687,11 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, ext4_debug("freeing block %llu\n", block); trace_ext4_free_blocks(inode, block, count, flags); - if (flags & EXT4_FREE_BLOCKS_FORGET) { - struct buffer_head *tbh = bh; - int i; + if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { + BUG_ON(count > 1); - BUG_ON(bh && (count > 1)); - - for (i = 0; i < count; i++) { - cond_resched(); - if (!bh) - tbh = sb_find_get_block(inode->i_sb, - block + i); - if (!tbh) - continue; - ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, - inode, tbh, block + i); - } + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, + inode, bh, block); } /* @@ -4747,6 +4736,19 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, count += sbi->s_cluster_ratio - overflow; } + if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { + int i; + + for (i = 0; i < count; i++) { + cond_resched(); + bh = sb_find_get_block(inode->i_sb, block + i); + if (!bh) + continue; + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, + inode, bh, block + i); + } + } + do_more: overflow = 0; ext4_get_group_no_and_offset(sb, block, &block_group, &bit); From 33d14975e5ac469963d5d63856b61698ad0bff07 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 17 Oct 2015 22:35:09 -0400 Subject: [PATCH 19/26] jbd2: fix checkpoint list cleanup Unlike comments and expectation of callers journal_clean_one_cp_list() returned 1 not only if it freed the transaction but also if it freed some buffers in the transaction. That could make __jbd2_journal_clean_checkpoint_list() skip processing t_checkpoint_io_list and continue with processing the next transaction. This is mostly a cosmetic issue since the only result is we can sometimes free less memory than we could. But it's still worth fixing. Fix journal_clean_one_cp_list() to return 1 only if the transaction was really freed. Fixes: 50849db32a9f529235a84bcc84a6b8e631b1d0ec Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/jbd2/checkpoint.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 8c44654ce274..684996c8a3a4 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -427,7 +427,6 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy) struct journal_head *last_jh; struct journal_head *next_jh = jh; int ret; - int freed = 0; if (!jh) return 0; @@ -441,10 +440,9 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy) else ret = __jbd2_journal_remove_checkpoint(jh) + 1; if (!ret) - return freed; + return 0; if (ret == 2) return 1; - freed = 1; /* * This function only frees up some memory * if possible so we dont have an obligation @@ -452,10 +450,10 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy) * requested: */ if (need_resched()) - return freed; + return 0; } while (jh != last_jh); - return freed; + return 0; } /* From 6934da9238da947628be83635e365df41064b09b Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Sat, 17 Oct 2015 22:57:06 -0400 Subject: [PATCH 20/26] ext4: fix potential use after free in __ext4_journal_stop There is a use-after-free possibility in __ext4_journal_stop() in the case that we free the handle in the first jbd2_journal_stop() because we're referencing handle->h_err afterwards. This was introduced in 9705acd63b125dee8b15c705216d7186daea4625 and it is wrong. Fix it by storing the handle->h_err value beforehand and avoid referencing potentially freed handle. Fixes: 9705acd63b125dee8b15c705216d7186daea4625 Signed-off-by: Lukas Czerner Reviewed-by: Andreas Dilger Cc: stable@vger.kernel.org --- fs/ext4/ext4_jbd2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index d41843181818..e770c1ee4613 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -88,13 +88,13 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) return 0; } + err = handle->h_err; if (!handle->h_transaction) { - err = jbd2_journal_stop(handle); - return handle->h_err ? handle->h_err : err; + rc = jbd2_journal_stop(handle); + return err ? err : rc; } sb = handle->h_transaction->t_journal->j_private; - err = handle->h_err; rc = jbd2_journal_stop(handle); if (!err) From 904dad4742d211b7a8910e92695c0fa957483836 Mon Sep 17 00:00:00 2001 From: Andy Leiserson Date: Sun, 18 Oct 2015 00:36:29 -0400 Subject: [PATCH 21/26] [PATCH] fix calculation of meta_bg descriptor backups "group" is the group where the backup will be placed, and is initialized to zero in the declaration. This meant that backups for meta_bg descriptors were erroneously written to the backup block group descriptors in groups 1 and (desc_per_block-1). Reproduction information: mke2fs -Fq -t ext4 -b 1024 -O ^resize_inode /tmp/foo.img 16G truncate -s 24G /tmp/foo.img losetup /dev/loop0 /tmp/foo.img mount /dev/loop0 /mnt resize2fs /dev/loop0 umount /dev/loop0 dd if=/dev/zero of=/dev/loop0 bs=1024 count=2 e2fsck -fy /dev/loop0 losetup -d /dev/loop0 Signed-off-by: Andy Leiserson Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/resize.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 469eeccee0bf..ad62d7acc315 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1039,7 +1039,7 @@ exit_free: * do not copy the full number of backups at this time. The resize * which changed s_groups_count will backup again. */ -static void update_backups(struct super_block *sb, int blk_off, char *data, +static void update_backups(struct super_block *sb, sector_t blk_off, char *data, int size, int meta_bg) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -1064,7 +1064,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data, group = ext4_list_backups(sb, &three, &five, &seven); last = sbi->s_groups_count; } else { - group = ext4_meta_bg_first_group(sb, group) + 1; + group = ext4_get_group_number(sb, blk_off) + 1; last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2); } From 4327ba52afd03fc4b5afa0ee1d774c9c5b0e85c5 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Sun, 18 Oct 2015 17:02:56 -0400 Subject: [PATCH 22/26] ext4, jbd2: ensure entering into panic after recording an error in superblock If a EXT4 filesystem utilizes JBD2 journaling and an error occurs, the journaling will be aborted first and the error number will be recorded into JBD2 superblock and, finally, the system will enter into the panic state in "errors=panic" option. But, in the rare case, this sequence is little twisted like the below figure and it will happen that the system enters into panic state, which means the system reset in mobile environment, before completion of recording an error in the journal superblock. In this case, e2fsck cannot recognize that the filesystem failure occurred in the previous run and the corruption wouldn't be fixed. Task A Task B ext4_handle_error() -> jbd2_journal_abort() -> __journal_abort_soft() -> __jbd2_journal_abort_hard() | -> journal->j_flags |= JBD2_ABORT; | | __ext4_abort() | -> jbd2_journal_abort() | | -> __journal_abort_soft() | | -> if (journal->j_flags & JBD2_ABORT) | | return; | -> panic() | -> jbd2_journal_update_sb_errno() Tested-by: Hobin Woo Signed-off-by: Daeho Jeong Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/super.c | 12 ++++++++++-- fs/jbd2/journal.c | 6 +++++- include/linux/jbd2.h | 1 + 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 49fa4d48b6a4..639613fe20aa 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -388,9 +388,13 @@ static void ext4_handle_error(struct super_block *sb) smp_wmb(); sb->s_flags |= MS_RDONLY; } - if (test_opt(sb, ERRORS_PANIC)) + if (test_opt(sb, ERRORS_PANIC)) { + if (EXT4_SB(sb)->s_journal && + !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) + return; panic("EXT4-fs (device %s): panic forced after error\n", sb->s_id); + } } #define ext4_error_ratelimit(sb) \ @@ -585,8 +589,12 @@ void __ext4_abort(struct super_block *sb, const char *function, jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); save_error_info(sb, function, line); } - if (test_opt(sb, ERRORS_PANIC)) + if (test_opt(sb, ERRORS_PANIC)) { + if (EXT4_SB(sb)->s_journal && + !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) + return; panic("EXT4-fs panic from previous error\n"); + } } void __ext4_msg(struct super_block *sb, diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index eb8a7546e8a4..81e622681c82 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2072,8 +2072,12 @@ static void __journal_abort_soft (journal_t *journal, int errno) __jbd2_journal_abort_hard(journal); - if (errno) + if (errno) { jbd2_journal_update_sb_errno(journal); + write_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_REC_ERR; + write_unlock(&journal->j_state_lock); + } } /** diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b5ca2314cace..65407f6c9120 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1112,6 +1112,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file * data write error in ordered * mode */ +#define JBD2_REC_ERR 0x080 /* The errno in the sb has been recorded */ /* * Function declarations for the journaling transaction and buffer From c93cf2d75779d73376e84e2547de1304113d3f1c Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sun, 18 Oct 2015 23:35:32 -0400 Subject: [PATCH 23/26] ext4: explicit mount options parsing cleanup Currently MOPT_EXPLICIT treated as EXPLICIT_DELALLOC which may be changed in future. Let's fix it now. Signed-off-by: Dmitry Monakhov Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 639613fe20aa..db4be39a8db7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1516,8 +1516,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, return -1; if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) return -1; - if (m->flags & MOPT_EXPLICIT) - set_opt2(sb, EXPLICIT_DELALLOC); + if (m->flags & MOPT_EXPLICIT) { + if (m->mount_opt & EXT4_MOUNT_DELALLOC) { + set_opt2(sb, EXPLICIT_DELALLOC); + } else + return -1; + } if (m->flags & MOPT_CLEAR_ERR) clear_opt(sb, ERRORS_MASK); if (token == Opt_noquota && sb_any_quota_loaded(sb)) { From 1e381f60dad913625956ed3a9c5714d9fc7e865f Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sun, 18 Oct 2015 23:50:26 -0400 Subject: [PATCH 24/26] ext4: do not allow journal_opts for fs w/o journal It is appeared that we can pass journal related mount options and such options be shown in /proc/mounts Example: #mkfs.ext4 -F /dev/vdb #tune2fs -O ^has_journal /dev/vdb #mount /dev/vdb /mnt/ -ocommit=20,journal_async_commit #cat /proc/mounts | grep /mnt /dev/vdb /mnt ext4 rw,relatime,journal_checksum,journal_async_commit,commit=20,data=ordered 0 0 But options:"journal_checksum,journal_async_commit,commit=20,data=ordered" has nothing with reality because there is no journal at all. This patch disallow following options for journalless configurations: - journal_checksum - journal_async_commit - commit=%ld - data={writeback,ordered,journal} Signed-off-by: Dmitry Monakhov Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger --- fs/ext4/ext4.h | 3 +++ fs/ext4/super.c | 31 +++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b1d04aa724be..750063f7a50c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1023,6 +1023,9 @@ struct ext4_inode_info { #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated file systems */ +#define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly + specified journal checksum */ + #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ ~EXT4_MOUNT_##opt #define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index db4be39a8db7..04d0f1b33409 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1384,10 +1384,10 @@ static const struct mount_opts { {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, - MOPT_EXT4_ONLY | MOPT_SET}, + MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | EXT4_MOUNT_JOURNAL_CHECKSUM), - MOPT_EXT4_ONLY | MOPT_SET}, + MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET}, {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, @@ -1519,6 +1519,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, if (m->flags & MOPT_EXPLICIT) { if (m->mount_opt & EXT4_MOUNT_DELALLOC) { set_opt2(sb, EXPLICIT_DELALLOC); + } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) { + set_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM); } else return -1; } @@ -3677,6 +3679,31 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "suppressed and not mounted read-only"); goto failed_mount_wq; } else { + /* Nojournal mode, all journal mount options are illegal */ + if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "journal_checksum, fs mounted w/o journal"); + goto failed_mount_wq; + } + if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "journal_async_commit, fs mounted w/o journal"); + goto failed_mount_wq; + } + if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "commit=%lu, fs mounted w/o journal", + sbi->s_commit_interval / HZ); + goto failed_mount_wq; + } + if (EXT4_MOUNT_DATA_FLAGS & + (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "data=, fs mounted w/o journal"); + goto failed_mount_wq; + } + sbi->s_def_mount_opt &= EXT4_MOUNT_JOURNAL_CHECKSUM; + clear_opt(sb, JOURNAL_CHECKSUM); clear_opt(sb, DATA_FLAGS); sbi->s_journal = NULL; needs_recovery = 0; From 16175039e6c6527453a23b4949c2a68e82750895 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 19 Oct 2015 00:01:05 -0400 Subject: [PATCH 25/26] ext4: fix abs() usage in ext4_mb_check_group_pa The ext4_fsblk_t type is a long long, which should not be used with abs(), as is done in ext4_mb_check_group_pa(). This patch modifies ext4_mb_check_group_pa() to use abs64() instead. Signed-off-by: John Stultz Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 61eaf74dca37..b4b3c1f91814 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3333,8 +3333,8 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block, atomic_inc(&pa->pa_count); return pa; } - cur_distance = abs(goal_block - cpa->pa_pstart); - new_distance = abs(goal_block - pa->pa_pstart); + cur_distance = abs64(goal_block - cpa->pa_pstart); + new_distance = abs64(goal_block - pa->pa_pstart); if (cur_distance <= new_distance) return cpa; From be69e1c19f0efb091f29521715c380842fa4cd7e Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 29 Oct 2015 14:18:13 -0400 Subject: [PATCH 26/26] fs/ext4: remove unnecessary new_valid_dev check As new_valid_dev always returns 1, so !new_valid_dev check is not needed, remove it. Signed-off-by: Yaowei Bai Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 52a79d48752c..19ce34525a59 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2468,9 +2468,6 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode; int err, credits, retries = 0; - if (!new_valid_dev(rdev)) - return -EINVAL; - err = dquot_initialize(dir); if (err) return err;