From da9953b729c12ece6d35fd15d236457eee679228 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 2 Apr 2020 09:32:35 -0700 Subject: [PATCH 01/51] f2fs: introduce sysfs/data_io_flag to attach REQ_META/FUA This patch introduces a way to attach REQ_META/FUA explicitly to all the data writes given temperature. -> attach REQ_FUA to Hot Data writes -> attach REQ_FUA to Hot|Warm Data writes -> attach REQ_FUA to Hot|Warm|Cold Data writes -> attach REQ_FUA to Hot|Warm|Cold Data writes as well as REQ_META to Hot Data writes Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 9 +++++++++ fs/f2fs/data.c | 23 +++++++++++++++++++++++ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/sysfs.c | 3 ++- 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index bd8a0d19abe6..c8620ea7022a 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -323,3 +323,12 @@ What: /sys/fs/f2fs//mounted_time_sec Date: February 2020 Contact: "Jaegeuk Kim" Description: Show the mounted time in secs of this partition. + +What: /sys/fs/f2fs//data_io_flag +Date: April 2020 +Contact: "Jaegeuk Kim" +Description: Give a way to attach REQ_META|FUA to data writes + given temperature-based bits. Now the bits indicate: + * REQ_META | REQ_FUA | + * 5 | 4 | 3 | 2 | 1 | 0 | + * Cold | Warm | Hot | Cold | Warm | Hot | diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cdf2f626bea7..accd28728642 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -513,6 +513,28 @@ void f2fs_submit_bio(struct f2fs_sb_info *sbi, __submit_bio(sbi, bio, type); } +static void __attach_data_io_flag(struct f2fs_io_info *fio) +{ + struct f2fs_sb_info *sbi = fio->sbi; + unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1; + unsigned int fua_flag = sbi->data_io_flag & temp_mask; + unsigned int meta_flag = (sbi->data_io_flag >> NR_TEMP_TYPE) & + temp_mask; + /* + * data io flag bits per temp: + * REQ_META | REQ_FUA | + * 5 | 4 | 3 | 2 | 1 | 0 | + * Cold | Warm | Hot | Cold | Warm | Hot | + */ + if (fio->type != DATA) + return; + + if ((1 << fio->temp) & meta_flag) + fio->op_flags |= REQ_META; + if ((1 << fio->temp) & fua_flag) + fio->op_flags |= REQ_FUA; +} + static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; @@ -520,6 +542,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) if (!io->bio) return; + __attach_data_io_flag(fio); bio_set_op_attrs(io->bio, fio->op, fio->op_flags); if (is_read_io(fio->op)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ba470d5687fe..c2788738aa0d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1507,6 +1507,9 @@ struct f2fs_sb_info { unsigned long long write_iostat[NR_IO_TYPE]; bool iostat_enable; + /* to attach REQ_META|REQ_FUA flags */ + unsigned int data_io_flag; + /* For sysfs suppport */ struct kobject s_kobj; struct completion s_kobj_unregister; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index e3bbbef9b4f0..aeebfb5024a2 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -372,7 +372,6 @@ out: return count; } - if (!strcmp(a->attr.name, "iostat_enable")) { sbi->iostat_enable = !!t; if (!sbi->iostat_enable) @@ -543,6 +542,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); #endif +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag); F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(free_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); @@ -622,6 +622,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_rate), ATTR_LIST(inject_type), #endif + ATTR_LIST(data_io_flag), ATTR_LIST(dirty_segments), ATTR_LIST(free_segments), ATTR_LIST(unusable), From 2bc4bea33848e4c5f441fcec4a94497082c1cc9f Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 30 Mar 2020 03:30:59 +0000 Subject: [PATCH 02/51] f2fs: add tracepoint for f2fs iostat Added a tracepoint to see iostat of f2fs. Default period of that is 3 second. This tracepoint can be used to be monitoring I/O statistics periodically. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 +++ fs/f2fs/f2fs.h | 16 +++++++- fs/f2fs/super.c | 1 + fs/f2fs/sysfs.c | 39 +++++++++++++++++++ include/trace/events/f2fs.h | 52 +++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index c8620ea7022a..427f5b45c67f 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -332,3 +332,9 @@ Description: Give a way to attach REQ_META|FUA to data writes * REQ_META | REQ_FUA | * 5 | 4 | 3 | 2 | 1 | 0 | * Cold | Warm | Hot | Cold | Warm | Hot | + +What: /sys/fs/f2fs//iostat_period_ms +Date: April 2020 +Contact: "Daeho Jeong" +Description: Give a way to change iostat_period time. 3secs by default. + The new iostat trace gives stats gap given the period. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c2788738aa0d..6cedbfb2067c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1505,7 +1505,10 @@ struct f2fs_sb_info { /* For app/fs IO statistics */ spinlock_t iostat_lock; unsigned long long write_iostat[NR_IO_TYPE]; + unsigned long long prev_write_iostat[NR_IO_TYPE]; bool iostat_enable; + unsigned long iostat_next_period; + unsigned int iostat_period_ms; /* to attach REQ_META|REQ_FUA flags */ unsigned int data_io_flag; @@ -2999,16 +3002,25 @@ static inline int get_inline_xattr_addrs(struct inode *inode) sizeof((f2fs_inode)->field)) \ <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \ +#define DEFAULT_IOSTAT_PERIOD_MS 3000 +#define MIN_IOSTAT_PERIOD_MS 100 +/* maximum period of iostat tracing is 1 day */ +#define MAX_IOSTAT_PERIOD_MS 8640000 + static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi) { int i; spin_lock(&sbi->iostat_lock); - for (i = 0; i < NR_IO_TYPE; i++) + for (i = 0; i < NR_IO_TYPE; i++) { sbi->write_iostat[i] = 0; + sbi->prev_write_iostat[i] = 0; + } spin_unlock(&sbi->iostat_lock); } +extern void f2fs_record_iostat(struct f2fs_sb_info *sbi); + static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, enum iostat_type type, unsigned long long io_bytes) { @@ -3022,6 +3034,8 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, sbi->write_iostat[APP_WRITE_IO] - sbi->write_iostat[APP_DIRECT_IO]; spin_unlock(&sbi->iostat_lock); + + f2fs_record_iostat(sbi); } #define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f2dfc21c6abb..438296e17183 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3424,6 +3424,7 @@ try_onemore: /* init iostat info */ spin_lock_init(&sbi->iostat_lock); sbi->iostat_enable = false; + sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS; for (i = 0; i < NR_PAGE_TYPE; i++) { int n = (i == META) ? 1: NR_TEMP_TYPE; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index aeebfb5024a2..d05cb68c2637 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -15,6 +15,7 @@ #include "f2fs.h" #include "segment.h" #include "gc.h" +#include static struct proc_dir_entry *f2fs_proc_root; @@ -379,6 +380,15 @@ out: return count; } + if (!strcmp(a->attr.name, "iostat_period_ms")) { + if (t < MIN_IOSTAT_PERIOD_MS || t > MAX_IOSTAT_PERIOD_MS) + return -EINVAL; + spin_lock(&sbi->iostat_lock); + sbi->iostat_period_ms = (unsigned int)t; + spin_unlock(&sbi->iostat_lock); + return count; + } + *ui = (unsigned int)t; return count; @@ -535,6 +545,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); @@ -615,6 +626,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_idle_interval), ATTR_LIST(umount_discard_timeout), ATTR_LIST(iostat_enable), + ATTR_LIST(iostat_period_ms), ATTR_LIST(readdir_ra), ATTR_LIST(gc_pin_file_thresh), ATTR_LIST(extension_list), @@ -751,6 +763,33 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, return 0; } +void f2fs_record_iostat(struct f2fs_sb_info *sbi) +{ + unsigned long long iostat_diff[NR_IO_TYPE]; + int i; + + if (time_is_after_jiffies(sbi->iostat_next_period)) + return; + + /* Need double check under the lock */ + spin_lock(&sbi->iostat_lock); + if (time_is_after_jiffies(sbi->iostat_next_period)) { + spin_unlock(&sbi->iostat_lock); + return; + } + sbi->iostat_next_period = jiffies + + msecs_to_jiffies(sbi->iostat_period_ms); + + for (i = 0; i < NR_IO_TYPE; i++) { + iostat_diff[i] = sbi->write_iostat[i] - + sbi->prev_write_iostat[i]; + sbi->prev_write_iostat[i] = sbi->write_iostat[i]; + } + spin_unlock(&sbi->iostat_lock); + + trace_f2fs_iostat(sbi, iostat_diff); +} + static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset) { diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index d97adfc327f0..e78c8696e2ad 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1812,6 +1812,58 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end, TP_ARGS(inode, cluster_idx, compressed_size, ret) ); +TRACE_EVENT(f2fs_iostat, + + TP_PROTO(struct f2fs_sb_info *sbi, unsigned long long *iostat), + + TP_ARGS(sbi, iostat), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned long long, app_dio) + __field(unsigned long long, app_bio) + __field(unsigned long long, app_wio) + __field(unsigned long long, app_mio) + __field(unsigned long long, fs_dio) + __field(unsigned long long, fs_nio) + __field(unsigned long long, fs_mio) + __field(unsigned long long, fs_gc_dio) + __field(unsigned long long, fs_gc_nio) + __field(unsigned long long, fs_cp_dio) + __field(unsigned long long, fs_cp_nio) + __field(unsigned long long, fs_cp_mio) + __field(unsigned long long, fs_discard) + ), + + TP_fast_assign( + __entry->dev = sbi->sb->s_dev; + __entry->app_dio = iostat[APP_DIRECT_IO]; + __entry->app_bio = iostat[APP_BUFFERED_IO]; + __entry->app_wio = iostat[APP_WRITE_IO]; + __entry->app_mio = iostat[APP_MAPPED_IO]; + __entry->fs_dio = iostat[FS_DATA_IO]; + __entry->fs_nio = iostat[FS_NODE_IO]; + __entry->fs_mio = iostat[FS_META_IO]; + __entry->fs_gc_dio = iostat[FS_GC_DATA_IO]; + __entry->fs_gc_nio = iostat[FS_GC_NODE_IO]; + __entry->fs_cp_dio = iostat[FS_CP_DATA_IO]; + __entry->fs_cp_nio = iostat[FS_CP_NODE_IO]; + __entry->fs_cp_mio = iostat[FS_CP_META_IO]; + __entry->fs_discard = iostat[FS_DISCARD]; + ), + + TP_printk("dev = (%d,%d), " + "app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu], " + "fs [data=%llu, node=%llu, meta=%llu, discard=%llu], " + "gc [data=%llu, node=%llu], " + "cp [data=%llu, node=%llu, meta=%llu]", + show_dev(__entry->dev), __entry->app_wio, __entry->app_dio, + __entry->app_bio, __entry->app_mio, __entry->fs_dio, + __entry->fs_nio, __entry->fs_mio, __entry->fs_discard, + __entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio, + __entry->fs_cp_nio, __entry->fs_cp_mio) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ From 141af6ba5216d4d49de683582b600d5c9b51792c Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 15 Apr 2020 14:37:53 +0530 Subject: [PATCH 03/51] f2fs: fix long latency due to discard during umount F2FS already has a default timeout of 5 secs for discards that can be issued during umount, but it can take more than the 5 sec timeout if the underlying UFS device queue is already full and there are no more available free tags to be used. Fix this by submitting a small batch of discard requests so that it won't cause the device queue to be full at any time and thus doesn't incur its wait time in the umount context. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b7a9421472a7..90c75822fef6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1101,7 +1101,6 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, } else if (discard_type == DPOLICY_FSTRIM) { dpolicy->io_aware = false; } else if (discard_type == DPOLICY_UMOUNT) { - dpolicy->max_requests = UINT_MAX; dpolicy->io_aware = false; /* we need to issue all to keep CP_TRIMMED_FLAG */ dpolicy->granularity = 1; @@ -1463,6 +1462,8 @@ next: return issued; } +static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy); static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy) @@ -1471,12 +1472,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct list_head *pend_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; - int i, issued = 0; + int i, issued; bool io_interrupted = false; if (dpolicy->timeout) f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); +retry: + issued = 0; for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { if (dpolicy->timeout && f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) @@ -1523,6 +1526,11 @@ next: break; } + if (dpolicy->type == DPOLICY_UMOUNT && issued) { + __wait_all_discard_cmd(sbi, dpolicy); + goto retry; + } + if (!issued && io_interrupted) issued = -1; From 3fa6a8c5b55d063c6a759e0b354f9d7fc09ffbc0 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 15 Apr 2020 09:35:54 +0530 Subject: [PATCH 04/51] f2fs: report the discard cmd errors properly In case a discard_cmd is split into several bios, the dc->error must not be overwritten once an error is reported by a bio. Also, move it under dc->lock. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 90c75822fef6..728ff6e316ac 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1029,9 +1029,9 @@ static void f2fs_submit_discard_endio(struct bio *bio) struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; unsigned long flags; - dc->error = blk_status_to_errno(bio->bi_status); - spin_lock_irqsave(&dc->lock, flags); + if (!dc->error) + dc->error = blk_status_to_errno(bio->bi_status); dc->bio_ref--; if (!dc->bio_ref && dc->state == D_SUBMIT) { dc->state = D_DONE; From ce4c638cdd52b302247434daed4c127f258d9860 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 10 Apr 2020 18:07:20 +0800 Subject: [PATCH 05/51] f2fs: fix to handle error path of f2fs_ra_meta_pages() In f2fs_ra_meta_pages(), if f2fs_submit_page_bio() failed, we need to unlock page, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 852890b72d6a..6be357c8e002 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -220,6 +220,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .is_por = (type == META_POR), }; struct blk_plug plug; + int err; if (unlikely(type == META_POR)) fio.op_flags &= ~REQ_META; @@ -263,8 +264,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, } fio.page = page; - f2fs_submit_page_bio(&fio); - f2fs_put_page(page, 0); + err = f2fs_submit_page_bio(&fio); + f2fs_put_page(page, err ? 1 : 0); } out: blk_finish_plug(&plug); From df423399757531c20d495bf6b8b83a8dcca3565c Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Thu, 16 Apr 2020 11:47:41 +0530 Subject: [PATCH 06/51] f2fs: Fix the accounting of dcc->undiscard_blks When a discard_cmd needs to be split due to dpolicy->max_requests, then for the remaining length it will be either merged into another cmd or a new discard_cmd will be created. In this case, there is double accounting of dcc->undiscard_blks for the remaining len, due to which it shows incorrect value in stats. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 728ff6e316ac..1c48ec866b8c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1214,8 +1214,10 @@ submit: len = total_len; } - if (!err && len) + if (!err && len) { + dcc->undiscard_blks -= len; __update_discard_tree_range(sbi, bdev, lstart, start, len); + } return err; } From 8b83ac81f4283ae3bd05c9a7e15dca721014dd03 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 16 Apr 2020 18:16:56 +0800 Subject: [PATCH 07/51] f2fs: support read iostat Adds to support accounting read IOs from userspace/kernel. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 ++++ fs/f2fs/data.c | 6 +++++ fs/f2fs/f2fs.h | 37 +++++++++++++++++++------- fs/f2fs/file.c | 12 ++++++++- fs/f2fs/gc.c | 6 +++++ fs/f2fs/node.c | 8 +++++- fs/f2fs/sysfs.c | 52 +++++++++++++++++++++++++------------ include/trace/events/f2fs.h | 23 ++++++++++++++-- 8 files changed, 118 insertions(+), 31 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6be357c8e002..5ba649e17c72 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -86,6 +86,8 @@ repeat: return ERR_PTR(err); } + f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE); + lock_page(page); if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); @@ -266,6 +268,9 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, fio.page = page; err = f2fs_submit_page_bio(&fio); f2fs_put_page(page, err ? 1 : 0); + + if (!err) + f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE); } out: blk_finish_plug(&plug); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index accd28728642..199877cb57fe 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1033,6 +1033,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, } ClearPageError(page); inc_page_count(sbi, F2FS_RD_DATA); + f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); __submit_bio(sbi, bio, DATA); return 0; } @@ -2038,6 +2039,7 @@ submit_and_realloc: goto submit_and_realloc; inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); + f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE); ClearPageError(page); *last_block_in_bio = block_nr; goto out; @@ -2173,6 +2175,7 @@ submit_and_realloc: goto submit_and_realloc; inc_page_count(sbi, F2FS_RD_DATA); + f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); ClearPageError(page); *last_block_in_bio = blkaddr; } @@ -3526,6 +3529,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } else if (err < 0) { f2fs_write_failed(mapping, offset + count); } + } else { + if (err > 0) + f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err); } out: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6cedbfb2067c..3b9603266a2a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1088,8 +1088,9 @@ enum cp_reason_type { }; enum iostat_type { - APP_DIRECT_IO, /* app direct IOs */ - APP_BUFFERED_IO, /* app buffered IOs */ + /* WRITE IO */ + APP_DIRECT_IO, /* app direct write IOs */ + APP_BUFFERED_IO, /* app buffered write IOs */ APP_WRITE_IO, /* app write IOs */ APP_MAPPED_IO, /* app mapped IOs */ FS_DATA_IO, /* data IOs from kworker/fsync/reclaimer */ @@ -1100,6 +1101,17 @@ enum iostat_type { FS_CP_DATA_IO, /* data IOs from checkpoint */ FS_CP_NODE_IO, /* node IOs from checkpoint */ FS_CP_META_IO, /* meta IOs from checkpoint */ + + /* READ IO */ + APP_DIRECT_READ_IO, /* app direct read IOs */ + APP_BUFFERED_READ_IO, /* app buffered read IOs */ + APP_READ_IO, /* app read IOs */ + APP_MAPPED_READ_IO, /* app mapped read IOs */ + FS_DATA_READ_IO, /* data read IOs */ + FS_NODE_READ_IO, /* node read IOs */ + FS_META_READ_IO, /* meta read IOs */ + + /* other */ FS_DISCARD, /* discard */ NR_IO_TYPE, }; @@ -1504,8 +1516,8 @@ struct f2fs_sb_info { /* For app/fs IO statistics */ spinlock_t iostat_lock; - unsigned long long write_iostat[NR_IO_TYPE]; - unsigned long long prev_write_iostat[NR_IO_TYPE]; + unsigned long long rw_iostat[NR_IO_TYPE]; + unsigned long long prev_rw_iostat[NR_IO_TYPE]; bool iostat_enable; unsigned long iostat_next_period; unsigned int iostat_period_ms; @@ -3013,8 +3025,8 @@ static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi) spin_lock(&sbi->iostat_lock); for (i = 0; i < NR_IO_TYPE; i++) { - sbi->write_iostat[i] = 0; - sbi->prev_write_iostat[i] = 0; + sbi->rw_iostat[i] = 0; + sbi->prev_rw_iostat[i] = 0; } spin_unlock(&sbi->iostat_lock); } @@ -3027,12 +3039,17 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, if (!sbi->iostat_enable) return; spin_lock(&sbi->iostat_lock); - sbi->write_iostat[type] += io_bytes; + sbi->rw_iostat[type] += io_bytes; if (type == APP_WRITE_IO || type == APP_DIRECT_IO) - sbi->write_iostat[APP_BUFFERED_IO] = - sbi->write_iostat[APP_WRITE_IO] - - sbi->write_iostat[APP_DIRECT_IO]; + sbi->rw_iostat[APP_BUFFERED_IO] = + sbi->rw_iostat[APP_WRITE_IO] - + sbi->rw_iostat[APP_DIRECT_IO]; + + if (type == APP_READ_IO || type == APP_DIRECT_READ_IO) + sbi->rw_iostat[APP_BUFFERED_READ_IO] = + sbi->rw_iostat[APP_READ_IO] - + sbi->rw_iostat[APP_DIRECT_READ_IO]; spin_unlock(&sbi->iostat_lock); f2fs_record_iostat(sbi); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6ab8f621a3c5..a0a4413d6083 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -40,6 +40,10 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) ret = filemap_fault(vmf); up_read(&F2FS_I(inode)->i_mmap_sem); + if (!ret) + f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO, + F2FS_BLKSIZE); + trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)ret); return ret; @@ -3510,11 +3514,17 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + int ret; if (!f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; - return generic_file_read_iter(iocb, iter); + ret = generic_file_read_iter(iocb, iter); + + if (ret > 0) + f2fs_update_iostat(F2FS_I_SB(inode), APP_READ_IO, ret); + + return ret; } static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 26248c8936db..28a8c79c8bdc 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -737,6 +737,9 @@ got_it: goto put_encrypted_page; f2fs_put_page(fio.encrypted_page, 0); f2fs_put_page(page, 1); + + f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + return 0; put_encrypted_page: f2fs_put_page(fio.encrypted_page, 1); @@ -840,6 +843,9 @@ static int move_data_block(struct inode *inode, block_t bidx, f2fs_put_page(mpage, 1); goto up_out; } + + f2fs_update_iostat(fio.sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + lock_page(mpage); if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || !PageUptodate(mpage))) { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ecbd6bd14a49..4da0d8713df5 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1300,7 +1300,13 @@ static int read_node_page(struct page *page, int op_flags) } fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr; - return f2fs_submit_page_bio(&fio); + + err = f2fs_submit_page_bio(&fio); + + if (!err) + f2fs_update_iostat(sbi, FS_NODE_READ_IO, F2FS_BLKSIZE); + + return err; } /* diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index d05cb68c2637..eaf8088548f0 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -781,9 +781,9 @@ void f2fs_record_iostat(struct f2fs_sb_info *sbi) msecs_to_jiffies(sbi->iostat_period_ms); for (i = 0; i < NR_IO_TYPE; i++) { - iostat_diff[i] = sbi->write_iostat[i] - - sbi->prev_write_iostat[i]; - sbi->prev_write_iostat[i] = sbi->write_iostat[i]; + iostat_diff[i] = sbi->rw_iostat[i] - + sbi->prev_rw_iostat[i]; + sbi->prev_rw_iostat[i] = sbi->rw_iostat[i]; } spin_unlock(&sbi->iostat_lock); @@ -802,33 +802,51 @@ static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, seq_printf(seq, "time: %-16llu\n", now); - /* print app IOs */ + /* print app write IOs */ seq_printf(seq, "app buffered: %-16llu\n", - sbi->write_iostat[APP_BUFFERED_IO]); + sbi->rw_iostat[APP_BUFFERED_IO]); seq_printf(seq, "app direct: %-16llu\n", - sbi->write_iostat[APP_DIRECT_IO]); + sbi->rw_iostat[APP_DIRECT_IO]); seq_printf(seq, "app mapped: %-16llu\n", - sbi->write_iostat[APP_MAPPED_IO]); + sbi->rw_iostat[APP_MAPPED_IO]); - /* print fs IOs */ + /* print fs write IOs */ seq_printf(seq, "fs data: %-16llu\n", - sbi->write_iostat[FS_DATA_IO]); + sbi->rw_iostat[FS_DATA_IO]); seq_printf(seq, "fs node: %-16llu\n", - sbi->write_iostat[FS_NODE_IO]); + sbi->rw_iostat[FS_NODE_IO]); seq_printf(seq, "fs meta: %-16llu\n", - sbi->write_iostat[FS_META_IO]); + sbi->rw_iostat[FS_META_IO]); seq_printf(seq, "fs gc data: %-16llu\n", - sbi->write_iostat[FS_GC_DATA_IO]); + sbi->rw_iostat[FS_GC_DATA_IO]); seq_printf(seq, "fs gc node: %-16llu\n", - sbi->write_iostat[FS_GC_NODE_IO]); + sbi->rw_iostat[FS_GC_NODE_IO]); seq_printf(seq, "fs cp data: %-16llu\n", - sbi->write_iostat[FS_CP_DATA_IO]); + sbi->rw_iostat[FS_CP_DATA_IO]); seq_printf(seq, "fs cp node: %-16llu\n", - sbi->write_iostat[FS_CP_NODE_IO]); + sbi->rw_iostat[FS_CP_NODE_IO]); seq_printf(seq, "fs cp meta: %-16llu\n", - sbi->write_iostat[FS_CP_META_IO]); + sbi->rw_iostat[FS_CP_META_IO]); + + /* print app read IOs */ + seq_printf(seq, "app buffered: %-16llu\n", + sbi->rw_iostat[APP_BUFFERED_READ_IO]); + seq_printf(seq, "app direct: %-16llu\n", + sbi->rw_iostat[APP_DIRECT_READ_IO]); + seq_printf(seq, "app mapped: %-16llu\n", + sbi->rw_iostat[APP_MAPPED_READ_IO]); + + /* print fs read IOs */ + seq_printf(seq, "fs data: %-16llu\n", + sbi->rw_iostat[FS_DATA_READ_IO]); + seq_printf(seq, "fs node: %-16llu\n", + sbi->rw_iostat[FS_NODE_READ_IO]); + seq_printf(seq, "fs meta: %-16llu\n", + sbi->rw_iostat[FS_META_READ_IO]); + + /* print other IOs */ seq_printf(seq, "fs discard: %-16llu\n", - sbi->write_iostat[FS_DISCARD]); + sbi->rw_iostat[FS_DISCARD]); return 0; } diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index e78c8696e2ad..417a486f5c8a 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1832,6 +1832,13 @@ TRACE_EVENT(f2fs_iostat, __field(unsigned long long, fs_cp_dio) __field(unsigned long long, fs_cp_nio) __field(unsigned long long, fs_cp_mio) + __field(unsigned long long, app_drio) + __field(unsigned long long, app_brio) + __field(unsigned long long, app_rio) + __field(unsigned long long, app_mrio) + __field(unsigned long long, fs_drio) + __field(unsigned long long, fs_nrio) + __field(unsigned long long, fs_mrio) __field(unsigned long long, fs_discard) ), @@ -1849,6 +1856,13 @@ TRACE_EVENT(f2fs_iostat, __entry->fs_cp_dio = iostat[FS_CP_DATA_IO]; __entry->fs_cp_nio = iostat[FS_CP_NODE_IO]; __entry->fs_cp_mio = iostat[FS_CP_META_IO]; + __entry->app_drio = iostat[APP_DIRECT_READ_IO]; + __entry->app_brio = iostat[APP_BUFFERED_READ_IO]; + __entry->app_rio = iostat[APP_READ_IO]; + __entry->app_mrio = iostat[APP_MAPPED_READ_IO]; + __entry->fs_drio = iostat[FS_DATA_READ_IO]; + __entry->fs_nrio = iostat[FS_NODE_READ_IO]; + __entry->fs_mrio = iostat[FS_META_READ_IO]; __entry->fs_discard = iostat[FS_DISCARD]; ), @@ -1856,12 +1870,17 @@ TRACE_EVENT(f2fs_iostat, "app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu], " "fs [data=%llu, node=%llu, meta=%llu, discard=%llu], " "gc [data=%llu, node=%llu], " - "cp [data=%llu, node=%llu, meta=%llu]", + "cp [data=%llu, node=%llu, meta=%llu], " + "app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], " + "fs [data=%llu, node=%llu, meta=%llu]", show_dev(__entry->dev), __entry->app_wio, __entry->app_dio, __entry->app_bio, __entry->app_mio, __entry->fs_dio, __entry->fs_nio, __entry->fs_mio, __entry->fs_discard, __entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio, - __entry->fs_cp_nio, __entry->fs_cp_mio) + __entry->fs_cp_nio, __entry->fs_cp_mio, + __entry->app_rio, __entry->app_drio, __entry->app_brio, + __entry->app_mrio, __entry->fs_drio, __entry->fs_nrio, + __entry->fs_mrio) ); #endif /* _TRACE_F2FS_H */ From 435cbab95e3966cd8310addd9e9b758dce0e8b84 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Apr 2020 10:25:21 -0700 Subject: [PATCH 08/51] f2fs: fix quota_sync failure due to f2fs_lock_op f2fs_quota_sync() uses f2fs_lock_op() before flushing dirty pages, but f2fs_write_data_page() returns EAGAIN. Likewise dentry blocks, we can just bypass getting the lock, since quota blocks are also maintained by checkpoint. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 8 +++++--- fs/f2fs/data.c | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index df7b2d15eacd..26b071afe48a 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -985,7 +985,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, loff_t psize; int i, err; - if (!f2fs_trylock_op(sbi)) + if (!IS_NOQUOTA(inode) && !f2fs_trylock_op(sbi)) return -EAGAIN; set_new_dnode(&dn, cc->inode, NULL, NULL, 0); @@ -1092,7 +1092,8 @@ unlock_continue: set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); + if (!IS_NOQUOTA(inode)) + f2fs_unlock_op(sbi); spin_lock(&fi->i_size_lock); if (fi->last_disk_size < psize) @@ -1118,7 +1119,8 @@ out_put_cic: out_put_dnode: f2fs_put_dnode(&dn); out_unlock_op: - f2fs_unlock_op(sbi); + if (!IS_NOQUOTA(inode)) + f2fs_unlock_op(sbi); return -EAGAIN; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 199877cb57fe..a5505aba2a1f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2659,8 +2659,8 @@ write: f2fs_available_free_memory(sbi, BASE_CHECK)))) goto redirty_out; - /* Dentry blocks are controlled by checkpoint */ - if (S_ISDIR(inode->i_mode)) { + /* Dentry/quota blocks are controlled by checkpoint */ + if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) { fio.need_lock = LOCK_DONE; err = f2fs_do_write_data_page(&fio); goto done; From d29fbcdb051fbe8e6aee87dd5a7df723f299494d Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Sat, 25 Apr 2020 18:49:08 +0530 Subject: [PATCH 09/51] f2fs: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header files related to F2FS File System support. For C header files Documentation/process/license-rules.rst mandates C-like comments (opposed to C source files where C++ style should be used). Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46. Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.h | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.h | 2 +- fs/f2fs/node.h | 2 +- fs/f2fs/segment.h | 2 +- fs/f2fs/trace.h | 2 +- fs/f2fs/xattr.h | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index b96823c59b15..124868c13f80 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/acl.h * diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3b9603266a2a..d036a31a97e8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/f2fs.h * diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index bbac9d3787bd..db3c61046aa4 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/gc.h * diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index e05af5df5648..6a2011deea23 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/node.h * diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 7a83bd530812..cba16cca5189 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/segment.h * diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index e8075fc5b228..789f6aa727fc 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * f2fs IO tracer * diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 938fcd20565d..6a192e6c7a9e 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/xattr.h * From 3c57f75182452cc954f5e12374601b72574f4709 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 1 May 2020 16:35:23 -0700 Subject: [PATCH 10/51] f2fs: use strcmp() in parse_options() Remove the pointless string length checks. Just use strcmp(). Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 44 ++++++++++++++++---------------------------- 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 438296e17183..d0a705e49332 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -427,11 +427,11 @@ static int parse_options(struct super_block *sb, char *options) if (!name) return -ENOMEM; - if (strlen(name) == 2 && !strncmp(name, "on", 2)) { + if (!strcmp(name, "on")) { F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; - } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) { + } else if (!strcmp(name, "off")) { F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF; - } else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) { + } else if (!strcmp(name, "sync")) { F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC; } else { kvfree(name); @@ -591,16 +591,14 @@ static int parse_options(struct super_block *sb, char *options) if (!name) return -ENOMEM; - if (strlen(name) == 8 && - !strncmp(name, "adaptive", 8)) { + if (!strcmp(name, "adaptive")) { if (f2fs_sb_has_blkzoned(sbi)) { f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature"); kvfree(name); return -EINVAL; } F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; - } else if (strlen(name) == 3 && - !strncmp(name, "lfs", 3)) { + } else if (!strcmp(name, "lfs")) { F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; } else { kvfree(name); @@ -725,14 +723,11 @@ static int parse_options(struct super_block *sb, char *options) name = match_strdup(&args[0]); if (!name) return -ENOMEM; - if (strlen(name) == 10 && - !strncmp(name, "user-based", 10)) { + if (!strcmp(name, "user-based")) { F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER; - } else if (strlen(name) == 3 && - !strncmp(name, "off", 3)) { + } else if (!strcmp(name, "off")) { F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; - } else if (strlen(name) == 8 && - !strncmp(name, "fs-based", 8)) { + } else if (!strcmp(name, "fs-based")) { F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS; } else { kvfree(name); @@ -745,11 +740,9 @@ static int parse_options(struct super_block *sb, char *options) if (!name) return -ENOMEM; - if (strlen(name) == 7 && - !strncmp(name, "default", 7)) { + if (!strcmp(name, "default")) { F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; - } else if (strlen(name) == 5 && - !strncmp(name, "reuse", 5)) { + } else if (!strcmp(name, "reuse")) { F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; } else { kvfree(name); @@ -761,14 +754,11 @@ static int parse_options(struct super_block *sb, char *options) name = match_strdup(&args[0]); if (!name) return -ENOMEM; - if (strlen(name) == 5 && - !strncmp(name, "posix", 5)) { + if (!strcmp(name, "posix")) { F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; - } else if (strlen(name) == 6 && - !strncmp(name, "strict", 6)) { + } else if (!strcmp(name, "strict")) { F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT; - } else if (strlen(name) == 9 && - !strncmp(name, "nobarrier", 9)) { + } else if (!strcmp(name, "nobarrier")) { F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_NOBARRIER; } else { @@ -823,15 +813,13 @@ static int parse_options(struct super_block *sb, char *options) name = match_strdup(&args[0]); if (!name) return -ENOMEM; - if (strlen(name) == 3 && !strcmp(name, "lzo")) { + if (!strcmp(name, "lzo")) { F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; - } else if (strlen(name) == 3 && - !strcmp(name, "lz4")) { + } else if (!strcmp(name, "lz4")) { F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; - } else if (strlen(name) == 4 && - !strcmp(name, "zstd")) { + } else if (!strcmp(name, "zstd")) { F2FS_OPTION(sbi).compress_algorithm = COMPRESS_ZSTD; } else { From b5f4684b5f5fe6ea3c0d57acf5acdf8e8fbfe503 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 29 Feb 2020 18:49:06 +0800 Subject: [PATCH 11/51] f2fs: remove redundant compress inode check due to f2fs_post_read_required() has did that. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d036a31a97e8..a3809b092798 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4047,8 +4047,6 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, return true; if (f2fs_is_multi_device(sbi)) return true; - if (f2fs_compressed_file(inode)) - return true; /* * for blkzoned device, fallback direct IO to buffered IO, so * all IOs can be serialized by log-structured write. From 3265d3db1f16395cfc6b8ea9b31b4001d98d05ef Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 18 Mar 2020 16:22:59 +0800 Subject: [PATCH 12/51] f2fs: support partial truncation on compressed inode Supports to truncate compressed/normal cluster partially on compressed inode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 19 +++++++++++++----- 3 files changed, 65 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 26b071afe48a..d6283e351c95 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -954,6 +954,55 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata, return first_index; } +int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock) +{ + void *fsdata = NULL; + struct page *pagep; + int log_cluster_size = F2FS_I(inode)->i_log_cluster_size; + pgoff_t start_idx = from >> (PAGE_SHIFT + log_cluster_size) << + log_cluster_size; + int err; + + err = f2fs_is_compressed_cluster(inode, start_idx); + if (err < 0) + return err; + + /* truncate normal cluster */ + if (!err) + return f2fs_do_truncate_blocks(inode, from, lock); + + /* truncate compressed cluster */ + err = f2fs_prepare_compress_overwrite(inode, &pagep, + start_idx, &fsdata); + + /* should not be a normal cluster */ + f2fs_bug_on(F2FS_I_SB(inode), err == 0); + + if (err <= 0) + return err; + + if (err > 0) { + struct page **rpages = fsdata; + int cluster_size = F2FS_I(inode)->i_cluster_size; + int i; + + for (i = cluster_size - 1; i >= 0; i--) { + loff_t start = rpages[i]->index << PAGE_SHIFT; + + if (from <= start) { + zero_user_segment(rpages[i], 0, PAGE_SIZE); + } else { + zero_user_segment(rpages[i], from - start, + PAGE_SIZE); + break; + } + } + + f2fs_compress_write_end(inode, fsdata, start_idx, true); + } + return 0; +} + static int f2fs_write_compressed_pages(struct compress_ctx *cc, int *submitted, struct writeback_control *wbc, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a3809b092798..cc7a7a0c02d6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3105,6 +3105,7 @@ static inline void f2fs_clear_page_private(struct page *page) */ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); void f2fs_truncate_data_blocks(struct dnode_of_data *dn); +int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock); int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock); int f2fs_truncate(struct inode *inode); int f2fs_getattr(const struct path *path, struct kstat *stat, @@ -3825,6 +3826,7 @@ int f2fs_prepare_compress_overwrite(struct inode *inode, struct page **pagep, pgoff_t index, void **fsdata); bool f2fs_compress_write_end(struct inode *inode, void *fsdata, pgoff_t index, unsigned copied); +int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock); void f2fs_compress_write_end_io(struct bio *bio, struct page *page); bool f2fs_is_compress_backend_ready(struct inode *inode); void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a0a4413d6083..175c66190087 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -647,9 +647,6 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, return 0; } - if (f2fs_compressed_file(inode)) - return 0; - page = f2fs_get_lock_data_page(inode, index, true); if (IS_ERR(page)) return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); @@ -665,7 +662,7 @@ truncate_out: return 0; } -static int do_truncate_blocks(struct inode *inode, u64 from, bool lock) +int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; @@ -733,7 +730,9 @@ free_partial: int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) { u64 free_from = from; + int err; +#ifdef CONFIG_F2FS_FS_COMPRESSION /* * for compressed file, only support cluster size * aligned truncation. @@ -748,8 +747,18 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) free_from++; free_from <<= cluster_shift; } +#endif - return do_truncate_blocks(inode, free_from, lock); + err = f2fs_do_truncate_blocks(inode, free_from, lock); + if (err) + return err; + +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (from != free_from) + err = f2fs_truncate_partial_cluster(inode, from, lock); +#endif + + return err; } int f2fs_truncate(struct inode *inode) From bf38fbad12b365223f9c5d4057f741bb03372737 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 28 Mar 2020 17:40:40 +0800 Subject: [PATCH 13/51] f2fs: support fiemap on compressed inode Map normal/compressed cluster of compressed inode correctly, and give the right fiemap flag FIEMAP_EXTENT_ENCODED on mapped compressed extent. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a5505aba2a1f..b79d643470d6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1832,6 +1832,25 @@ static int f2fs_xattr_fiemap(struct inode *inode, return (err < 0 ? err : 0); } +static loff_t max_inode_blocks(struct inode *inode) +{ + loff_t result = ADDRS_PER_INODE(inode); + loff_t leaf_count = ADDRS_PER_BLOCK(inode); + + /* two direct node blocks */ + result += (leaf_count * 2); + + /* two indirect node blocks */ + leaf_count *= NIDS_PER_BLOCK; + result += (leaf_count * 2); + + /* one double indirect node block */ + leaf_count *= NIDS_PER_BLOCK; + result += leaf_count; + + return result; +} + int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { @@ -1841,6 +1860,8 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 logical = 0, phys = 0, size = 0; u32 flags = 0; int ret = 0; + bool compr_cluster = false; + unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { ret = f2fs_precache_extents(inode); @@ -1875,6 +1896,9 @@ next: memset(&map_bh, 0, sizeof(struct buffer_head)); map_bh.b_size = len; + if (compr_cluster) + map_bh.b_size = blk_to_logical(inode, cluster_size - 1); + ret = get_data_block(inode, start_blk, &map_bh, 0, F2FS_GET_BLOCK_FIEMAP, &next_pgofs); if (ret) @@ -1885,7 +1909,7 @@ next: start_blk = next_pgofs; if (blk_to_logical(inode, start_blk) < blk_to_logical(inode, - F2FS_I_SB(inode)->max_file_blocks)) + max_inode_blocks(inode))) goto prep_next; flags |= FIEMAP_EXTENT_LAST; @@ -1897,11 +1921,38 @@ next: ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); + if (ret) + goto out; + size = 0; } - if (start_blk > last_blk || ret) + if (start_blk > last_blk) goto out; + if (compr_cluster) { + compr_cluster = false; + + + logical = blk_to_logical(inode, start_blk - 1); + phys = blk_to_logical(inode, map_bh.b_blocknr); + size = blk_to_logical(inode, cluster_size); + + flags |= FIEMAP_EXTENT_ENCODED; + + start_blk += cluster_size - 1; + + if (start_blk > last_blk) + goto out; + + goto prep_next; + } + + if (map_bh.b_blocknr == COMPRESS_ADDR) { + compr_cluster = true; + start_blk++; + goto prep_next; + } + logical = blk_to_logical(inode, start_blk); phys = blk_to_logical(inode, map_bh.b_blocknr); size = map_bh.b_size; From c1c633878662341eb0c502b3cab6e8c5cc83f44c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 30 Mar 2020 17:13:29 +0800 Subject: [PATCH 14/51] f2fs: introduce f2fs_bmap_compress() to support bmap() on compressed inode: if queried block locates in non-compressed cluster, return its physical block address. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b79d643470d6..ff8a92df8d99 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3666,6 +3666,37 @@ static int f2fs_set_data_page_dirty(struct page *page) return 0; } + +static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block) +{ +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct dnode_of_data dn; + sector_t start_idx, blknr = 0; + int ret; + + start_idx = round_down(block, F2FS_I(inode)->i_cluster_size); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); + if (ret) + return 0; + + if (dn.data_blkaddr != COMPRESS_ADDR) { + dn.ofs_in_node += block - start_idx; + blknr = f2fs_data_blkaddr(&dn); + if (!__is_valid_data_blkaddr(blknr)) + blknr = 0; + } + + f2fs_put_dnode(&dn); + + return blknr; +#else + return -EOPNOTSUPP; +#endif +} + + static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; @@ -3677,6 +3708,9 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) filemap_write_and_wait(mapping); + if (f2fs_compressed_file(inode)) + return f2fs_bmap_compress(inode, block); + return generic_block_bmap(mapping, block, get_data_block_bmap); } From 5e6bbde95982300d66d78fb282d4ee39df78fc33 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 8 Apr 2020 19:56:05 +0800 Subject: [PATCH 15/51] f2fs: introduce mempool for {,de}compress intermediate page allocation If compression feature is on, in scenario of no enough free memory, page refault ratio is higher than before, the root cause is: - {,de}compression flow needs to allocate intermediate pages to store compressed data in cluster, so during their allocation, vm may reclaim mmaped pages. - if above reclaimed pages belong to compressed cluster, during its refault, it may cause more intermediate pages allocation, result in reclaiming more mmaped pages. So this patch introduces a mempool for intermediate page allocation, in order to avoid high refault ratio, by default, number of preallocated page in pool is 512, user can change the number by assigning 'num_compress_pages' parameter during module initialization. Ma Feng found warnings in the original patch and fixed like below. Fix the following sparse warning: fs/f2fs/compress.c:501:5: warning: symbol 'num_compress_pages' was not declared. Should it be static? fs/f2fs/compress.c:530:6: warning: symbol 'f2fs_compress_free_page' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Chao Yu Signed-off-by: Ma Feng Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 64 ++++++++++++++++++++++++++++++---------------- fs/f2fs/f2fs.h | 4 +++ fs/f2fs/super.c | 6 +++++ 3 files changed, 52 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d6283e351c95..230ea7cd1510 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -65,15 +65,6 @@ static void f2fs_set_compressed_page(struct page *page, page->mapping = inode->i_mapping; } -static void f2fs_put_compressed_page(struct page *page) -{ - set_page_private(page, (unsigned long)NULL); - ClearPagePrivate(page); - page->mapping = NULL; - unlock_page(page); - put_page(page); -} - static void f2fs_drop_rpages(struct compress_ctx *cc, int len, bool unlock) { int i; @@ -476,17 +467,47 @@ bool f2fs_is_compress_backend_ready(struct inode *inode) return f2fs_cops[F2FS_I(inode)->i_compress_algorithm]; } -static struct page *f2fs_grab_page(void) +static mempool_t *compress_page_pool = NULL; +static int num_compress_pages = 512; +module_param(num_compress_pages, uint, 0444); +MODULE_PARM_DESC(num_compress_pages, + "Number of intermediate compress pages to preallocate"); + +int f2fs_init_compress_mempool(void) +{ + compress_page_pool = mempool_create_page_pool(num_compress_pages, 0); + if (!compress_page_pool) + return -ENOMEM; + + return 0; +} + +void f2fs_destroy_compress_mempool(void) +{ + mempool_destroy(compress_page_pool); +} + +static struct page *f2fs_compress_alloc_page(void) { struct page *page; - page = alloc_page(GFP_NOFS); - if (!page) - return NULL; + page = mempool_alloc(compress_page_pool, GFP_NOFS); lock_page(page); + return page; } +static void f2fs_compress_free_page(struct page *page) +{ + if (!page) + return; + set_page_private(page, (unsigned long)NULL); + ClearPagePrivate(page); + page->mapping = NULL; + unlock_page(page); + mempool_free(page, compress_page_pool); +} + static int f2fs_compress_pages(struct compress_ctx *cc) { struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); @@ -516,7 +537,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) } for (i = 0; i < cc->nr_cpages; i++) { - cc->cpages[i] = f2fs_grab_page(); + cc->cpages[i] = f2fs_compress_alloc_page(); if (!cc->cpages[i]) { ret = -ENOMEM; goto out_free_cpages; @@ -561,7 +582,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) vunmap(cc->rbuf); for (i = nr_cpages; i < cc->nr_cpages; i++) { - f2fs_put_compressed_page(cc->cpages[i]); + f2fs_compress_free_page(cc->cpages[i]); cc->cpages[i] = NULL; } @@ -581,7 +602,7 @@ out_vunmap_rbuf: out_free_cpages: for (i = 0; i < cc->nr_cpages; i++) { if (cc->cpages[i]) - f2fs_put_compressed_page(cc->cpages[i]); + f2fs_compress_free_page(cc->cpages[i]); } kfree(cc->cpages); cc->cpages = NULL; @@ -1183,7 +1204,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) if (unlikely(bio->bi_status)) mapping_set_error(cic->inode->i_mapping, -EIO); - f2fs_put_compressed_page(page); + f2fs_compress_free_page(page); dec_page_count(sbi, F2FS_WB_DATA); @@ -1344,7 +1365,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) for (i = 0; i < dic->nr_cpages; i++) { struct page *page; - page = f2fs_grab_page(); + page = f2fs_compress_alloc_page(); if (!page) goto out_free; @@ -1364,7 +1385,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) continue; } - dic->tpages[i] = f2fs_grab_page(); + dic->tpages[i] = f2fs_compress_alloc_page(); if (!dic->tpages[i]) goto out_free; } @@ -1386,8 +1407,7 @@ void f2fs_free_dic(struct decompress_io_ctx *dic) continue; if (!dic->tpages[i]) continue; - unlock_page(dic->tpages[i]); - put_page(dic->tpages[i]); + f2fs_compress_free_page(dic->tpages[i]); } kfree(dic->tpages); } @@ -1396,7 +1416,7 @@ void f2fs_free_dic(struct decompress_io_ctx *dic) for (i = 0; i < dic->nr_cpages; i++) { if (!dic->cpages[i]) continue; - f2fs_put_compressed_page(dic->cpages[i]); + f2fs_compress_free_page(dic->cpages[i]); } kfree(dic->cpages); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cc7a7a0c02d6..d098b94ca22d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3829,6 +3829,8 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata, int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock); void f2fs_compress_write_end_io(struct bio *bio, struct page *page); bool f2fs_is_compress_backend_ready(struct inode *inode); +int f2fs_init_compress_mempool(void); +void f2fs_destroy_compress_mempool(void); void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity); bool f2fs_cluster_is_empty(struct compress_ctx *cc); bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); @@ -3862,6 +3864,8 @@ static inline struct page *f2fs_compress_control_page(struct page *page) WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); } +static inline int f2fs_init_compress_mempool(void) { return 0; } +static inline void f2fs_destroy_compress_mempool(void) { } #endif static inline void set_compress_context(struct inode *inode) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d0a705e49332..1b170fc48941 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3879,7 +3879,12 @@ static int __init init_f2fs_fs(void) err = f2fs_init_bioset(); if (err) goto free_bio_enrty_cache; + err = f2fs_init_compress_mempool(); + if (err) + goto free_bioset; return 0; +free_bioset: + f2fs_destroy_bioset(); free_bio_enrty_cache: f2fs_destroy_bio_entry_cache(); free_post_read: @@ -3907,6 +3912,7 @@ fail: static void __exit exit_f2fs_fs(void) { + f2fs_destroy_compress_mempool(); f2fs_destroy_bioset(); f2fs_destroy_bio_entry_cache(); f2fs_destroy_post_read_processing(); From 6d92b201035dfe77426f8814fd5259db385a18b3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 8 Apr 2020 19:56:32 +0800 Subject: [PATCH 16/51] f2fs: compress: support lzo-rle compress algorithm LZO-RLE extension (run length encoding) was introduced to improve performance of LZO algorithm in scenario of data contains many zeros, zram has changed to use this extended algorithm by default, this patch adds to support this algorithm extension, to enable this extension, it needs to enable F2FS_FS_LZO and F2FS_FS_LZORLE config, and specifies "compress_algorithm=lzo-rle" mountoption. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 2 +- fs/f2fs/Kconfig | 10 ++++++++++ fs/f2fs/compress.c | 30 ++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 6 ++++++ include/trace/events/f2fs.h | 3 ++- 6 files changed, 50 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 87d794bc75a4..96e3f89d7aba 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -244,7 +244,7 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enabl would be unusable can be viewed at /sys/fs/f2fs//unusable This space is reclaimed once checkpoint=enable. compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo", - "lz4" and "zstd" algorithm. + "lz4", "zstd" and "lzo-rle" algorithm. compress_log_size=%u Support configuring compress cluster size, the size will be 4KB * (1 << %u), 16KB is minimum size, also it's default size. diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index bb68d21e1f8c..d13c5c6a9787 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -127,3 +127,13 @@ config F2FS_FS_ZSTD default y help Support ZSTD compress algorithm, if unsure, say Y. + +config F2FS_FS_LZORLE + bool "LZO-RLE compression support" + depends on F2FS_FS_COMPRESSION + depends on F2FS_FS_LZO + select LZO_COMPRESS + select LZO_DECOMPRESS + default y + help + Support LZO-RLE compress algorithm, if unsure, say Y. diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 230ea7cd1510..c7c5a8f8a48c 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -442,6 +442,31 @@ static const struct f2fs_compress_ops f2fs_zstd_ops = { }; #endif +#ifdef CONFIG_F2FS_FS_LZO +#ifdef CONFIG_F2FS_FS_LZORLE +static int lzorle_compress_pages(struct compress_ctx *cc) +{ + int ret; + + ret = lzorle1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata, + &cc->clen, cc->private); + if (ret != LZO_E_OK) { + printk_ratelimited("%sF2FS-fs (%s): lzo-rle compress failed, ret:%d\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret); + return -EIO; + } + return 0; +} + +static const struct f2fs_compress_ops f2fs_lzorle_ops = { + .init_compress_ctx = lzo_init_compress_ctx, + .destroy_compress_ctx = lzo_destroy_compress_ctx, + .compress_pages = lzorle_compress_pages, + .decompress_pages = lzo_decompress_pages, +}; +#endif +#endif + static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { #ifdef CONFIG_F2FS_FS_LZO &f2fs_lzo_ops, @@ -458,6 +483,11 @@ static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { #else NULL, #endif +#if defined(CONFIG_F2FS_FS_LZO) && defined(CONFIG_F2FS_FS_LZORLE) + &f2fs_lzorle_ops, +#else + NULL, +#endif }; bool f2fs_is_compress_backend_ready(struct inode *inode) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d098b94ca22d..0dab21e764d9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1281,6 +1281,7 @@ enum compress_algorithm_type { COMPRESS_LZO, COMPRESS_LZ4, COMPRESS_ZSTD, + COMPRESS_LZORLE, COMPRESS_MAX, }; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1b170fc48941..582bbf40c559 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -822,6 +822,9 @@ static int parse_options(struct super_block *sb, char *options) } else if (!strcmp(name, "zstd")) { F2FS_OPTION(sbi).compress_algorithm = COMPRESS_ZSTD; + } else if (!strcmp(name, "lzo-rle")) { + F2FS_OPTION(sbi).compress_algorithm = + COMPRESS_LZORLE; } else { kfree(name); return -EINVAL; @@ -1415,6 +1418,9 @@ static inline void f2fs_show_compress_options(struct seq_file *seq, case COMPRESS_ZSTD: algtype = "zstd"; break; + case COMPRESS_LZORLE: + algtype = "lzo-rle"; + break; } seq_printf(seq, ",compress_algorithm=%s", algtype); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 417a486f5c8a..757d3d6031e6 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -154,7 +154,8 @@ TRACE_DEFINE_ENUM(CP_PAUSE); __print_symbolic(type, \ { COMPRESS_LZO, "LZO" }, \ { COMPRESS_LZ4, "LZ4" }, \ - { COMPRESS_ZSTD, "ZSTD" }) + { COMPRESS_ZSTD, "ZSTD" }, \ + { COMPRESS_LZORLE, "LZO-RLE" }) struct f2fs_sb_info; struct f2fs_io_info; From ff5f85c8d62a487bde415ef4c9e2d0be718021df Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 7 May 2020 00:59:02 -0700 Subject: [PATCH 17/51] f2fs: don't leak filename in f2fs_try_convert_inline_dir() We need to call fscrypt_free_filename() to free the memory allocated by fscrypt_setup_filename(). Fixes: b06af2aff28b ("f2fs: convert inline_dir early before starting rename") Cc: # v5.6+ Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4167e5408151..59a4b7ff11e1 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -559,12 +559,12 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) ipage = f2fs_get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) { err = PTR_ERR(ipage); - goto out; + goto out_fname; } if (f2fs_has_enough_room(dir, ipage, &fname)) { f2fs_put_page(ipage, 1); - goto out; + goto out_fname; } inline_dentry = inline_data_addr(dir, ipage); @@ -572,6 +572,8 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) err = do_convert_inline_dir(dir, ipage, inline_dentry); if (!err) f2fs_put_page(ipage, 1); +out_fname: + fscrypt_free_filename(&fname); out: f2fs_unlock_op(sbi); return err; From f874fa1c7c7905c1744a2037a11516558ed00a81 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 7 May 2020 00:59:03 -0700 Subject: [PATCH 18/51] f2fs: split f2fs_d_compare() from f2fs_match_name() Sharing f2fs_ci_compare() between comparing cached dentries (f2fs_d_compare()) and comparing on-disk dentries (f2fs_match_name()) doesn't work as well as intended, as these actions fundamentally differ in several ways (e.g. whether the task may sleep, whether the directory is stable, whether the casefolded name was precomputed, whether the dentry will need to be decrypted once we allow casefold+encrypt, etc.) Just make f2fs_d_compare() implement what it needs directly, and rework f2fs_ci_compare() to be specialized for f2fs_match_name(). Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 66 +++++++++++++++++++++++++------------------------- fs/f2fs/f2fs.h | 5 ---- 2 files changed, 33 insertions(+), 38 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 44bfc464df78..44eb12a00cd0 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -107,36 +107,28 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, /* * Test whether a case-insensitive directory entry matches the filename * being searched for. - * - * Returns: 0 if the directory entry matches, more than 0 if it - * doesn't match or less than zero on error. */ -int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, - const struct qstr *entry, bool quick) +static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name, + const struct qstr *entry, bool quick) { - const struct f2fs_sb_info *sbi = F2FS_SB(parent->i_sb); + const struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); const struct unicode_map *um = sbi->s_encoding; - int ret; + int res; if (quick) - ret = utf8_strncasecmp_folded(um, name, entry); + res = utf8_strncasecmp_folded(um, name, entry); else - ret = utf8_strncasecmp(um, name, entry); - - if (ret < 0) { - /* Handle invalid character sequence as either an error - * or as an opaque byte sequence. + res = utf8_strncasecmp(um, name, entry); + if (res < 0) { + /* + * In strict mode, ignore invalid names. In non-strict mode, + * fall back to treating them as opaque byte sequences. */ - if (f2fs_has_strict_mode(sbi)) - return -EINVAL; - - if (name->len != entry->len) - return 1; - - return !!memcmp(name->name, entry->name, name->len); + if (f2fs_has_strict_mode(sbi) || name->len != entry->len) + return false; + return !memcmp(name->name, entry->name, name->len); } - - return ret; + return res == 0; } static void f2fs_fname_setup_ci_filename(struct inode *dir, @@ -188,10 +180,10 @@ static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d, if (cf_str->name) { struct qstr cf = {.name = cf_str->name, .len = cf_str->len}; - return !f2fs_ci_compare(parent, &cf, &entry, true); + return f2fs_match_ci_name(parent, &cf, &entry, true); } - return !f2fs_ci_compare(parent, fname->usr_fname, &entry, - false); + return f2fs_match_ci_name(parent, fname->usr_fname, &entry, + false); } #endif if (fscrypt_match_name(fname, d->filename[bit_pos], @@ -1080,17 +1072,25 @@ const struct file_operations f2fs_dir_operations = { static int f2fs_d_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { - struct qstr qstr = {.name = str, .len = len }; const struct dentry *parent = READ_ONCE(dentry->d_parent); - const struct inode *inode = READ_ONCE(parent->d_inode); + const struct inode *dir = READ_ONCE(parent->d_inode); + const struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct qstr entry = QSTR_INIT(str, len); + int res; - if (!inode || !IS_CASEFOLDED(inode)) { - if (len != name->len) - return -1; - return memcmp(str, name->name, len); - } + if (!dir || !IS_CASEFOLDED(dir)) + goto fallback; - return f2fs_ci_compare(inode, name, &qstr, false); + res = utf8_strncasecmp(sbi->s_encoding, name, &entry); + if (res >= 0) + return res; + + if (f2fs_has_strict_mode(sbi)) + return -EINVAL; +fallback: + if (len != name->len) + return 1; + return !!memcmp(str, name->name, len); } static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0dab21e764d9..216b0a809355 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3142,11 +3142,6 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); -extern int f2fs_ci_compare(const struct inode *parent, - const struct qstr *name, - const struct qstr *entry, - bool quick); - /* * dir.c */ From 43c780ba26244e4caf3f9986beb6c4ae5eb54f50 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 7 May 2020 00:59:04 -0700 Subject: [PATCH 19/51] f2fs: rework filename handling Rework f2fs's handling of filenames to use a new 'struct f2fs_filename'. Similar to 'struct ext4_filename', this stores the usr_fname, disk_name, dirhash, crypto_buf, and casefolded name. Some of these names can be NULL in some cases. 'struct f2fs_filename' differs from 'struct fscrypt_name' mainly in that the casefolded name is included. For user-initiated directory operations like lookup() and create(), initialize the f2fs_filename by translating the corresponding fscrypt_name, then computing the dirhash and casefolded name if needed. This makes the dirhash and casefolded name be cached for each syscall, so we don't have to recompute them repeatedly. (Previously, f2fs computed the dirhash once per directory level, and the casefolded name once per directory block.) This improves performance. This rework also makes it much easier to correctly handle all combinations of normal, encrypted, casefolded, and encrypted+casefolded directories. (The fourth isn't supported yet but is being worked on.) The only other cases where an f2fs_filename gets initialized are for two filesystem-internal operations: (1) when converting an inline directory to a regular one, we grab the needed disk_name and hash from an existing f2fs_dir_entry; and (2) when roll-forward recovering a new dentry, we grab the needed disk_name from f2fs_inode::i_name and compute the hash. Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 310 +++++++++++++++++++++++++-------------------- fs/f2fs/f2fs.h | 78 +++++++++--- fs/f2fs/hash.c | 76 +++++------ fs/f2fs/inline.c | 45 ++++--- fs/f2fs/namei.c | 6 +- fs/f2fs/recovery.c | 51 ++++++-- 6 files changed, 331 insertions(+), 235 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 44eb12a00cd0..29f70f2295cc 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -70,6 +70,111 @@ unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de) return DT_UNKNOWN; } +/* If @dir is casefolded, initialize @fname->cf_name from @fname->usr_fname. */ +int f2fs_init_casefolded_name(const struct inode *dir, + struct f2fs_filename *fname) +{ +#ifdef CONFIG_UNICODE + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + + if (IS_CASEFOLDED(dir)) { + fname->cf_name.name = f2fs_kmalloc(sbi, F2FS_NAME_LEN, + GFP_NOFS); + if (!fname->cf_name.name) + return -ENOMEM; + fname->cf_name.len = utf8_casefold(sbi->s_encoding, + fname->usr_fname, + fname->cf_name.name, + F2FS_NAME_LEN); + if ((int)fname->cf_name.len <= 0) { + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; + if (f2fs_has_strict_mode(sbi)) + return -EINVAL; + /* fall back to treating name as opaque byte sequence */ + } + } +#endif + return 0; +} + +static int __f2fs_setup_filename(const struct inode *dir, + const struct fscrypt_name *crypt_name, + struct f2fs_filename *fname) +{ + int err; + + memset(fname, 0, sizeof(*fname)); + + fname->usr_fname = crypt_name->usr_fname; + fname->disk_name = crypt_name->disk_name; +#ifdef CONFIG_FS_ENCRYPTION + fname->crypto_buf = crypt_name->crypto_buf; +#endif + if (crypt_name->is_ciphertext_name) { + /* hash was decoded from the no-key name */ + fname->hash = cpu_to_le32(crypt_name->hash); + } else { + err = f2fs_init_casefolded_name(dir, fname); + if (err) { + f2fs_free_filename(fname); + return err; + } + f2fs_hash_filename(dir, fname); + } + return 0; +} + +/* + * Prepare to search for @iname in @dir. This is similar to + * fscrypt_setup_filename(), but this also handles computing the casefolded name + * and the f2fs dirhash if needed, then packing all the information about this + * filename up into a 'struct f2fs_filename'. + */ +int f2fs_setup_filename(struct inode *dir, const struct qstr *iname, + int lookup, struct f2fs_filename *fname) +{ + struct fscrypt_name crypt_name; + int err; + + err = fscrypt_setup_filename(dir, iname, lookup, &crypt_name); + if (err) + return err; + + return __f2fs_setup_filename(dir, &crypt_name, fname); +} + +/* + * Prepare to look up @dentry in @dir. This is similar to + * fscrypt_prepare_lookup(), but this also handles computing the casefolded name + * and the f2fs dirhash if needed, then packing all the information about this + * filename up into a 'struct f2fs_filename'. + */ +int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, + struct f2fs_filename *fname) +{ + struct fscrypt_name crypt_name; + int err; + + err = fscrypt_prepare_lookup(dir, dentry, &crypt_name); + if (err) + return err; + + return __f2fs_setup_filename(dir, &crypt_name, fname); +} + +void f2fs_free_filename(struct f2fs_filename *fname) +{ +#ifdef CONFIG_FS_ENCRYPTION + kfree(fname->crypto_buf.name); + fname->crypto_buf.name = NULL; +#endif +#ifdef CONFIG_UNICODE + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; +#endif +} + static unsigned long dir_block_index(unsigned int level, int dir_level, unsigned int idx) { @@ -84,8 +189,7 @@ static unsigned long dir_block_index(unsigned int level, static struct f2fs_dir_entry *find_in_block(struct inode *dir, struct page *dentry_page, - struct fscrypt_name *fname, - f2fs_hash_t namehash, + const struct f2fs_filename *fname, int *max_slots, struct page **res_page) { @@ -96,7 +200,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); make_dentry_ptr_block(dir, &d, dentry_blk); - de = f2fs_find_target_dentry(fname, namehash, max_slots, &d); + de = f2fs_find_target_dentry(&d, fname, max_slots); if (de) *res_page = dentry_page; @@ -109,102 +213,55 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, * being searched for. */ static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name, - const struct qstr *entry, bool quick) + const u8 *de_name, u32 de_name_len) { const struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); const struct unicode_map *um = sbi->s_encoding; + struct qstr entry = QSTR_INIT(de_name, de_name_len); int res; - if (quick) - res = utf8_strncasecmp_folded(um, name, entry); - else - res = utf8_strncasecmp(um, name, entry); + res = utf8_strncasecmp_folded(um, name, &entry); if (res < 0) { /* * In strict mode, ignore invalid names. In non-strict mode, * fall back to treating them as opaque byte sequences. */ - if (f2fs_has_strict_mode(sbi) || name->len != entry->len) + if (f2fs_has_strict_mode(sbi) || name->len != entry.len) return false; - return !memcmp(name->name, entry->name, name->len); + return !memcmp(name->name, entry.name, name->len); } return res == 0; } +#endif /* CONFIG_UNICODE */ -static void f2fs_fname_setup_ci_filename(struct inode *dir, - const struct qstr *iname, - struct fscrypt_str *cf_name) +static inline bool f2fs_match_name(const struct inode *dir, + const struct f2fs_filename *fname, + const u8 *de_name, u32 de_name_len) { - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - - if (!IS_CASEFOLDED(dir)) { - cf_name->name = NULL; - return; - } - - cf_name->name = f2fs_kmalloc(sbi, F2FS_NAME_LEN, GFP_NOFS); - if (!cf_name->name) - return; - - cf_name->len = utf8_casefold(sbi->s_encoding, - iname, cf_name->name, - F2FS_NAME_LEN); - if ((int)cf_name->len <= 0) { - kvfree(cf_name->name); - cf_name->name = NULL; - } -} -#endif - -static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d, - struct f2fs_dir_entry *de, - struct fscrypt_name *fname, - struct fscrypt_str *cf_str, - unsigned long bit_pos, - f2fs_hash_t namehash) -{ -#ifdef CONFIG_UNICODE - struct inode *parent = d->inode; - struct f2fs_sb_info *sbi = F2FS_I_SB(parent); - struct qstr entry; -#endif - - if (de->hash_code != namehash) - return false; + struct fscrypt_name f; #ifdef CONFIG_UNICODE - entry.name = d->filename[bit_pos]; - entry.len = de->name_len; + if (fname->cf_name.name) { + struct qstr cf = FSTR_TO_QSTR(&fname->cf_name); - if (sbi->s_encoding && IS_CASEFOLDED(parent)) { - if (cf_str->name) { - struct qstr cf = {.name = cf_str->name, - .len = cf_str->len}; - return f2fs_match_ci_name(parent, &cf, &entry, true); - } - return f2fs_match_ci_name(parent, fname->usr_fname, &entry, - false); + return f2fs_match_ci_name(dir, &cf, de_name, de_name_len); } #endif - if (fscrypt_match_name(fname, d->filename[bit_pos], - le16_to_cpu(de->name_len))) - return true; - return false; + f.usr_fname = fname->usr_fname; + f.disk_name = fname->disk_name; +#ifdef CONFIG_FS_ENCRYPTION + f.crypto_buf = fname->crypto_buf; +#endif + return fscrypt_match_name(&f, de_name, de_name_len); } -struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, - f2fs_hash_t namehash, int *max_slots, - struct f2fs_dentry_ptr *d) +struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, + const struct f2fs_filename *fname, int *max_slots) { struct f2fs_dir_entry *de; - struct fscrypt_str cf_str = { .name = NULL, .len = 0 }; unsigned long bit_pos = 0; int max_len = 0; -#ifdef CONFIG_UNICODE - f2fs_fname_setup_ci_filename(d->inode, fname->usr_fname, &cf_str); -#endif - if (max_slots) *max_slots = 0; while (bit_pos < d->max) { @@ -221,7 +278,9 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, continue; } - if (f2fs_match_name(d, de, fname, &cf_str, bit_pos, namehash)) + if (de->hash_code == fname->hash && + f2fs_match_name(d->inode, fname, d->filename[bit_pos], + le16_to_cpu(de->name_len))) goto found; if (max_slots && max_len > *max_slots) @@ -235,33 +294,27 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, found: if (max_slots && max_len > *max_slots) *max_slots = max_len; - -#ifdef CONFIG_UNICODE - kvfree(cf_str.name); -#endif return de; } static struct f2fs_dir_entry *find_in_level(struct inode *dir, unsigned int level, - struct fscrypt_name *fname, + const struct f2fs_filename *fname, struct page **res_page) { - struct qstr name = FSTR_TO_QSTR(&fname->disk_name); - int s = GET_DENTRY_SLOTS(name.len); + int s = GET_DENTRY_SLOTS(fname->disk_name.len); unsigned int nbucket, nblock; unsigned int bidx, end_block; struct page *dentry_page; struct f2fs_dir_entry *de = NULL; bool room = false; int max_slots; - f2fs_hash_t namehash = f2fs_dentry_hash(dir, &name, fname); nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, - le32_to_cpu(namehash) % nbucket); + le32_to_cpu(fname->hash) % nbucket); end_block = bidx + nblock; for (; bidx < end_block; bidx++) { @@ -277,8 +330,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, } } - de = find_in_block(dir, dentry_page, fname, namehash, - &max_slots, res_page); + de = find_in_block(dir, dentry_page, fname, &max_slots, + res_page); if (de) break; @@ -287,8 +340,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, f2fs_put_page(dentry_page, 0); } - if (!de && room && F2FS_I(dir)->chash != namehash) { - F2FS_I(dir)->chash = namehash; + if (!de && room && F2FS_I(dir)->chash != fname->hash) { + F2FS_I(dir)->chash = fname->hash; F2FS_I(dir)->clevel = level; } @@ -296,7 +349,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, } struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, - struct fscrypt_name *fname, struct page **res_page) + const struct f2fs_filename *fname, + struct page **res_page) { unsigned long npages = dir_blocks(dir); struct f2fs_dir_entry *de = NULL; @@ -345,18 +399,10 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, const struct qstr *child, struct page **res_page) { struct f2fs_dir_entry *de = NULL; - struct fscrypt_name fname; + struct f2fs_filename fname; int err; -#ifdef CONFIG_UNICODE - if (f2fs_has_strict_mode(F2FS_I_SB(dir)) && IS_CASEFOLDED(dir) && - utf8_validate(F2FS_I_SB(dir)->s_encoding, child)) { - *res_page = ERR_PTR(-EINVAL); - return NULL; - } -#endif - - err = fscrypt_setup_filename(dir, child, 1, &fname); + err = f2fs_setup_filename(dir, child, 1, &fname); if (err) { if (err == -ENOENT) *res_page = NULL; @@ -367,7 +413,7 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, de = __f2fs_find_entry(dir, &fname, res_page); - fscrypt_free_filename(&fname); + f2fs_free_filename(&fname); return de; } @@ -408,7 +454,8 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_put_page(page, 1); } -static void init_dent_inode(const struct qstr *name, struct page *ipage) +static void init_dent_inode(const struct f2fs_filename *fname, + struct page *ipage) { struct f2fs_inode *ri; @@ -416,16 +463,16 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) /* copy name info. to this inode page */ ri = F2FS_INODE(ipage); - ri->i_namelen = cpu_to_le32(name->len); - memcpy(ri->i_name, name->name, name->len); + ri->i_namelen = cpu_to_le32(fname->disk_name.len); + memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len); set_page_dirty(ipage); } void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d) { - struct qstr dot = QSTR_INIT(".", 1); - struct qstr dotdot = QSTR_INIT("..", 2); + struct fscrypt_str dot = FSTR_INIT(".", 1); + struct fscrypt_str dotdot = FSTR_INIT("..", 2); /* update dirent of "." */ f2fs_update_dentry(inode->i_ino, inode->i_mode, d, &dot, 0, 0); @@ -459,8 +506,7 @@ static int make_empty_dir(struct inode *inode, } struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, - const struct qstr *new_name, const struct qstr *orig_name, - struct page *dpage) + const struct f2fs_filename *fname, struct page *dpage) { struct page *page; int err; @@ -485,7 +531,8 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, if (err) goto put_error; - err = f2fs_init_security(inode, dir, orig_name, page); + err = f2fs_init_security(inode, dir, + fname ? fname->usr_fname : NULL, page); if (err) goto put_error; @@ -500,8 +547,8 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, return page; } - if (new_name) { - init_dent_inode(new_name, page); + if (fname) { + init_dent_inode(fname, page); if (IS_ENCRYPTED(dir)) file_set_enc_name(inode); } @@ -569,11 +616,11 @@ next: } bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, - struct fscrypt_name *fname) + const struct f2fs_filename *fname) { struct f2fs_dentry_ptr d; unsigned int bit_pos; - int slots = GET_DENTRY_SLOTS(fname_len(fname)); + int slots = GET_DENTRY_SLOTS(fname->disk_name.len); make_dentry_ptr_inline(dir, &d, inline_data_addr(dir, ipage)); @@ -583,8 +630,8 @@ bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, } void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, - const struct qstr *name, f2fs_hash_t name_hash, - unsigned int bit_pos) + const struct fscrypt_str *name, f2fs_hash_t name_hash, + unsigned int bit_pos) { struct f2fs_dir_entry *de; int slots = GET_DENTRY_SLOTS(name->len); @@ -604,15 +651,13 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, } } -int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, - struct inode *inode, nid_t ino, umode_t mode) +int f2fs_add_regular_entry(struct inode *dir, const struct f2fs_filename *fname, + struct inode *inode, nid_t ino, umode_t mode) { unsigned int bit_pos; unsigned int level; unsigned int current_depth; unsigned long bidx, block; - f2fs_hash_t dentry_hash; unsigned int nbucket, nblock; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; @@ -621,11 +666,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, int slots, err = 0; level = 0; - slots = GET_DENTRY_SLOTS(new_name->len); - dentry_hash = f2fs_dentry_hash(dir, new_name, NULL); + slots = GET_DENTRY_SLOTS(fname->disk_name.len); current_depth = F2FS_I(dir)->i_current_depth; - if (F2FS_I(dir)->chash == dentry_hash) { + if (F2FS_I(dir)->chash == fname->hash) { level = F2FS_I(dir)->clevel; F2FS_I(dir)->chash = 0; } @@ -647,7 +691,7 @@ start: nblock = bucket_blocks(level); bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, - (le32_to_cpu(dentry_hash) % nbucket)); + (le32_to_cpu(fname->hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { dentry_page = f2fs_get_new_data_page(dir, NULL, block, true); @@ -671,8 +715,7 @@ add_dentry: if (inode) { down_write(&F2FS_I(inode)->i_sem); - page = f2fs_init_inode_metadata(inode, dir, new_name, - orig_name, NULL); + page = f2fs_init_inode_metadata(inode, dir, fname, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -680,7 +723,8 @@ add_dentry: } make_dentry_ptr_block(NULL, &d, dentry_blk); - f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos); + f2fs_update_dentry(ino, mode, &d, &fname->disk_name, fname->hash, + bit_pos); set_page_dirty(dentry_page); @@ -704,21 +748,15 @@ fail: return err; } -int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, - struct inode *inode, nid_t ino, umode_t mode) +int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname, + struct inode *inode, nid_t ino, umode_t mode) { - struct qstr new_name; int err = -EAGAIN; - new_name.name = fname_name(fname); - new_name.len = fname_len(fname); - if (f2fs_has_inline_dentry(dir)) - err = f2fs_add_inline_entry(dir, &new_name, fname->usr_fname, - inode, ino, mode); + err = f2fs_add_inline_entry(dir, fname, inode, ino, mode); if (err == -EAGAIN) - err = f2fs_add_regular_entry(dir, &new_name, fname->usr_fname, - inode, ino, mode); + err = f2fs_add_regular_entry(dir, fname, inode, ino, mode); f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); return err; @@ -731,12 +769,12 @@ int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode) { - struct fscrypt_name fname; + struct f2fs_filename fname; struct page *page = NULL; struct f2fs_dir_entry *de = NULL; int err; - err = fscrypt_setup_filename(dir, name, 0, &fname); + err = f2fs_setup_filename(dir, name, 0, &fname); if (err) return err; @@ -759,7 +797,7 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name, } else { err = f2fs_add_dentry(dir, &fname, inode, ino, mode); } - fscrypt_free_filename(&fname); + f2fs_free_filename(&fname); return err; } @@ -769,7 +807,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) int err = 0; down_write(&F2FS_I(inode)->i_sem); - page = f2fs_init_inode_metadata(inode, dir, NULL, NULL, NULL); + page = f2fs_init_inode_metadata(inode, dir, NULL, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 216b0a809355..20f047a07c5a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -506,6 +506,42 @@ static inline int get_inline_xattr_addrs(struct inode *inode); * For INODE and NODE manager */ /* for directory operations */ + +struct f2fs_filename { + /* + * The filename the user specified. This is NULL for some + * filesystem-internal operations, e.g. converting an inline directory + * to a non-inline one, or roll-forward recovering an encrypted dentry. + */ + const struct qstr *usr_fname; + + /* + * The on-disk filename. For encrypted directories, this is encrypted. + * This may be NULL for lookups in an encrypted dir without the key. + */ + struct fscrypt_str disk_name; + + /* The dirhash of this filename */ + f2fs_hash_t hash; + +#ifdef CONFIG_FS_ENCRYPTION + /* + * For lookups in encrypted directories: either the buffer backing + * disk_name, or a buffer that holds the decoded no-key name. + */ + struct fscrypt_str crypto_buf; +#endif +#ifdef CONFIG_UNICODE + /* + * For casefolded directories: the casefolded name, but it's left NULL + * if the original name is not valid Unicode or if the filesystem is + * doing an internal operation where usr_fname is also NULL. In these + * cases we fall back to treating the name as an opaque byte sequence. + */ + struct fscrypt_str cf_name; +#endif +}; + struct f2fs_dentry_ptr { struct inode *inode; void *bitmap; @@ -2921,12 +2957,12 @@ static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) return is_set_ckpt_flags(sbi, CP_ERROR_FLAG); } -static inline bool is_dot_dotdot(const struct qstr *str) +static inline bool is_dot_dotdot(const u8 *name, size_t len) { - if (str->len == 1 && str->name[0] == '.') + if (len == 1 && name[0] == '.') return true; - if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') + if (len == 2 && name[0] == '.' && name[1] == '.') return true; return false; @@ -3146,22 +3182,28 @@ struct dentry *f2fs_get_parent(struct dentry *child); * dir.c */ unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de); -struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, - f2fs_hash_t namehash, int *max_slots, - struct f2fs_dentry_ptr *d); +int f2fs_init_casefolded_name(const struct inode *dir, + struct f2fs_filename *fname); +int f2fs_setup_filename(struct inode *dir, const struct qstr *iname, + int lookup, struct f2fs_filename *fname); +int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, + struct f2fs_filename *fname); +void f2fs_free_filename(struct f2fs_filename *fname); +struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, + const struct f2fs_filename *fname, int *max_slots); int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, unsigned int start_pos, struct fscrypt_str *fstr); void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d); struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, - const struct qstr *new_name, - const struct qstr *orig_name, struct page *dpage); + const struct f2fs_filename *fname, struct page *dpage); void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth); int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots); void f2fs_drop_nlink(struct inode *dir, struct inode *inode); struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, - struct fscrypt_name *fname, struct page **res_page); + const struct f2fs_filename *fname, + struct page **res_page); struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, const struct qstr *child, struct page **res_page); struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p); @@ -3170,14 +3212,13 @@ ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode); bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, - struct fscrypt_name *fname); + const struct f2fs_filename *fname); void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, - const struct qstr *name, f2fs_hash_t name_hash, + const struct fscrypt_str *name, f2fs_hash_t name_hash, unsigned int bit_pos); -int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, +int f2fs_add_regular_entry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode); -int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, +int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode); int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode); @@ -3207,8 +3248,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); /* * hash.c */ -f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, - const struct qstr *name_info, struct fscrypt_name *fname); +void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname); /* * node.c @@ -3719,11 +3759,11 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry); int f2fs_write_inline_data(struct inode *inode, struct page *page); bool f2fs_recover_inline_data(struct inode *inode, struct page *npage); struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, - struct fscrypt_name *fname, struct page **res_page); + const struct f2fs_filename *fname, + struct page **res_page); int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage); -int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, +int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode); void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, struct inode *dir, diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 5bc4dcd8fc03..e5997919472d 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -68,22 +68,9 @@ static void str2hashbuf(const unsigned char *msg, size_t len, *buf++ = pad; } -static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, - struct fscrypt_name *fname) +static u32 TEA_hash_name(const u8 *p, size_t len) { - __u32 hash; - f2fs_hash_t f2fs_hash; - const unsigned char *p; __u32 in[8], buf[4]; - const unsigned char *name = name_info->name; - size_t len = name_info->len; - - /* encrypted bigname case */ - if (fname && !fname->disk_name.name) - return cpu_to_le32(fname->hash); - - if (is_dot_dotdot(name_info)) - return 0; /* Initialize the default seed for the hash checksum functions */ buf[0] = 0x67452301; @@ -91,7 +78,6 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, buf[2] = 0x98badcfe; buf[3] = 0x10325476; - p = name; while (1) { str2hashbuf(p, len, in, 4); TEA_transform(buf, in); @@ -100,41 +86,43 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, break; len -= 16; } - hash = buf[0]; - f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT); - return f2fs_hash; + return buf[0] & ~F2FS_HASH_COL_BIT; } -f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, - const struct qstr *name_info, struct fscrypt_name *fname) +/* + * Compute @fname->hash. For all directories, @fname->disk_name must be set. + * For casefolded directories, @fname->usr_fname must be set, and also + * @fname->cf_name if the filename is valid Unicode. + */ +void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname) { -#ifdef CONFIG_UNICODE - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - const struct unicode_map *um = sbi->s_encoding; - int r, dlen; - unsigned char *buff; - struct qstr folded; + const u8 *name = fname->disk_name.name; + size_t len = fname->disk_name.len; - if (!name_info->len || !IS_CASEFOLDED(dir)) - goto opaque_seq; + WARN_ON_ONCE(!name); - buff = f2fs_kzalloc(sbi, sizeof(char) * PATH_MAX, GFP_KERNEL); - if (!buff) - return -ENOMEM; - - dlen = utf8_casefold(um, name_info, buff, PATH_MAX); - if (dlen < 0) { - kvfree(buff); - goto opaque_seq; + if (is_dot_dotdot(name, len)) { + fname->hash = 0; + return; } - folded.name = buff; - folded.len = dlen; - r = __f2fs_dentry_hash(&folded, fname); - kvfree(buff); - return r; - -opaque_seq: +#ifdef CONFIG_UNICODE + if (IS_CASEFOLDED(dir)) { + /* + * If the casefolded name is provided, hash it instead of the + * on-disk name. If the casefolded name is *not* provided, that + * should only be because the name wasn't valid Unicode, so fall + * back to treating the name as an opaque byte sequence. + */ + WARN_ON_ONCE(!fname->usr_fname->name); + if (fname->cf_name.name) { + name = fname->cf_name.name; + len = fname->cf_name.len; + } else { + name = fname->usr_fname->name; + len = fname->usr_fname->len; + } + } #endif - return __f2fs_dentry_hash(name_info, fname); + fname->hash = cpu_to_le32(TEA_hash_name(name, len)); } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 59a4b7ff11e1..751a012ff840 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -305,15 +305,14 @@ process_inline: } struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, - struct fscrypt_name *fname, struct page **res_page) + const struct f2fs_filename *fname, + struct page **res_page) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - struct qstr name = FSTR_TO_QSTR(&fname->disk_name); struct f2fs_dir_entry *de; struct f2fs_dentry_ptr d; struct page *ipage; void *inline_dentry; - f2fs_hash_t namehash; ipage = f2fs_get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) { @@ -321,12 +320,10 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, return NULL; } - namehash = f2fs_dentry_hash(dir, &name, fname); - inline_dentry = inline_data_addr(dir, ipage); make_dentry_ptr_inline(dir, &d, inline_dentry); - de = f2fs_find_target_dentry(fname, namehash, NULL, &d); + de = f2fs_find_target_dentry(&d, fname, NULL); unlock_page(ipage); if (de) *res_page = ipage; @@ -443,7 +440,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) while (bit_pos < d.max) { struct f2fs_dir_entry *de; - struct qstr new_name; + struct f2fs_filename fname; nid_t ino; umode_t fake_mode; @@ -459,14 +456,19 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) continue; } - new_name.name = d.filename[bit_pos]; - new_name.len = le16_to_cpu(de->name_len); + /* + * We only need the disk_name and hash to move the dentry. + * We don't need the original or casefolded filenames. + */ + memset(&fname, 0, sizeof(fname)); + fname.disk_name.name = d.filename[bit_pos]; + fname.disk_name.len = le16_to_cpu(de->name_len); + fname.hash = de->hash_code; ino = le32_to_cpu(de->ino); fake_mode = f2fs_get_de_type(de) << S_SHIFT; - err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL, - ino, fake_mode); + err = f2fs_add_regular_entry(dir, &fname, NULL, ino, fake_mode); if (err) goto punch_dentry_pages; @@ -543,7 +545,7 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct page *ipage; - struct fscrypt_name fname; + struct f2fs_filename fname; void *inline_dentry = NULL; int err = 0; @@ -552,7 +554,7 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) f2fs_lock_op(sbi); - err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname); + err = f2fs_setup_filename(dir, &dentry->d_name, 0, &fname); if (err) goto out; @@ -573,23 +575,21 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) if (!err) f2fs_put_page(ipage, 1); out_fname: - fscrypt_free_filename(&fname); + f2fs_free_filename(&fname); out: f2fs_unlock_op(sbi); return err; } -int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, - struct inode *inode, nid_t ino, umode_t mode) +int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, + struct inode *inode, nid_t ino, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct page *ipage; unsigned int bit_pos; - f2fs_hash_t name_hash; void *inline_dentry = NULL; struct f2fs_dentry_ptr d; - int slots = GET_DENTRY_SLOTS(new_name->len); + int slots = GET_DENTRY_SLOTS(fname->disk_name.len); struct page *page = NULL; int err = 0; @@ -611,8 +611,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, if (inode) { down_write(&F2FS_I(inode)->i_sem); - page = f2fs_init_inode_metadata(inode, dir, new_name, - orig_name, ipage); + page = f2fs_init_inode_metadata(inode, dir, fname, ipage); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -621,8 +620,8 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, f2fs_wait_on_page_writeback(ipage, NODE, true, true); - name_hash = f2fs_dentry_hash(dir, new_name, NULL); - f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); + f2fs_update_dentry(ino, mode, &d, &fname->disk_name, fname->hash, + bit_pos); set_page_dirty(ipage); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index f54119da2217..58db1de7ca94 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -482,7 +482,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, nid_t ino = -1; int err = 0; unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir)); - struct fscrypt_name fname; + struct f2fs_filename fname; trace_f2fs_lookup_start(dir, dentry, flags); @@ -491,13 +491,13 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - err = fscrypt_prepare_lookup(dir, dentry, &fname); + err = f2fs_prepare_lookup(dir, dentry, &fname); if (err == -ENOENT) goto out_splice; if (err) goto out; de = __f2fs_find_entry(dir, &fname, &page); - fscrypt_free_filename(&fname); + f2fs_free_filename(&fname); if (!de) { if (IS_ERR(page)) { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index dd804c07eeb0..ae5310f02e7f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -107,13 +107,51 @@ static void del_fsync_inode(struct fsync_inode_entry *entry, int drop) kmem_cache_free(fsync_entry_slab, entry); } +static int init_recovered_filename(const struct inode *dir, + struct f2fs_inode *raw_inode, + struct f2fs_filename *fname, + struct qstr *usr_fname) +{ + int err; + + memset(fname, 0, sizeof(*fname)); + fname->disk_name.len = le32_to_cpu(raw_inode->i_namelen); + fname->disk_name.name = raw_inode->i_name; + + if (WARN_ON(fname->disk_name.len > F2FS_NAME_LEN)) + return -ENAMETOOLONG; + + if (!IS_ENCRYPTED(dir)) { + usr_fname->name = fname->disk_name.name; + usr_fname->len = fname->disk_name.len; + fname->usr_fname = usr_fname; + } + + /* Compute the hash of the filename */ + if (IS_CASEFOLDED(dir)) { + err = f2fs_init_casefolded_name(dir, fname); + if (err) + return err; + f2fs_hash_filename(dir, fname); +#ifdef CONFIG_UNICODE + /* Case-sensitive match is fine for recovery */ + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; +#endif + } else { + f2fs_hash_filename(dir, fname); + } + return 0; +} + static int recover_dentry(struct inode *inode, struct page *ipage, struct list_head *dir_list) { struct f2fs_inode *raw_inode = F2FS_INODE(ipage); nid_t pino = le32_to_cpu(raw_inode->i_pino); struct f2fs_dir_entry *de; - struct fscrypt_name fname; + struct f2fs_filename fname; + struct qstr usr_fname; struct page *page; struct inode *dir, *einode; struct fsync_inode_entry *entry; @@ -132,16 +170,9 @@ static int recover_dentry(struct inode *inode, struct page *ipage, } dir = entry->inode; - - memset(&fname, 0, sizeof(struct fscrypt_name)); - fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen); - fname.disk_name.name = raw_inode->i_name; - - if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) { - WARN_ON(1); - err = -ENAMETOOLONG; + err = init_recovered_filename(dir, raw_inode, &fname, &usr_fname); + if (err) goto out; - } retry: de = __f2fs_find_entry(dir, &fname, &page); if (de && inode->i_ino == le32_to_cpu(de->ino)) From 84c9c2de0626567c0d964ee5fa1ae3310911ddf8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 5 May 2020 11:41:11 -0700 Subject: [PATCH 20/51] f2fs: correctly fix the parent inode number during fsync() fsync() may be called on a deleted file that's still open. So when fsync() tries to set the parent inode number when the inode has LOST_PINO and i_nlink == 1 (to avoid later checkpoints), it needs to make sure to get the parent directory via a non-deleted alias. Also remove the unnecessary igrab() and iput(), as the caller already holds a reference to the inode. Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 175c66190087..81bfc5b44fa1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -169,9 +169,11 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) { struct dentry *dentry; - inode = igrab(inode); - dentry = d_find_any_alias(inode); - iput(inode); + /* + * Make sure to get the non-deleted alias. The alias associated with + * the open file descriptor being fsync()'ed may be deleted already. + */ + dentry = d_find_alias(inode); if (!dentry) return 0; From 042be373adf719ab64c4a44ae809d110826becbf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 8 May 2020 17:50:20 +0800 Subject: [PATCH 21/51] f2fs: shrink spinlock coverage In f2fs_try_to_free_nids(), .nid_list_lock spinlock critical region will increase as expected shrink number increase, to avoid spining other CPUs for long time, we change to release nid caches with small batch each time under .nid_list_lock coverage. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 25 +++++++++++++++---------- fs/f2fs/node.h | 3 +++ 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4da0d8713df5..1db8cabf727e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2488,7 +2488,6 @@ void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) { struct f2fs_nm_info *nm_i = NM_I(sbi); - struct free_nid *i, *next; int nr = nr_shrink; if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) @@ -2497,17 +2496,23 @@ int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) if (!mutex_trylock(&nm_i->build_lock)) return 0; - spin_lock(&nm_i->nid_list_lock); - list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { - if (nr_shrink <= 0 || - nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) - break; + while (nr_shrink && nm_i->nid_cnt[FREE_NID] > MAX_FREE_NIDS) { + struct free_nid *i, *next; + unsigned int batch = SHRINK_NID_BATCH_SIZE; - __remove_free_nid(sbi, i, FREE_NID); - kmem_cache_free(free_nid_slab, i); - nr_shrink--; + spin_lock(&nm_i->nid_list_lock); + list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { + if (!nr_shrink || !batch || + nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) + break; + __remove_free_nid(sbi, i, FREE_NID); + kmem_cache_free(free_nid_slab, i); + nr_shrink--; + batch--; + } + spin_unlock(&nm_i->nid_list_lock); } - spin_unlock(&nm_i->nid_list_lock); + mutex_unlock(&nm_i->build_lock); return nr - nr_shrink; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 6a2011deea23..69e5859e993c 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -15,6 +15,9 @@ #define FREE_NID_PAGES 8 #define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) +/* size of free nid batch when shrinking */ +#define SHRINK_NID_BATCH_SIZE 8 + #define DEF_RA_NID_PAGES 0 /* # of nid pages to be readaheaded */ /* maximum readahead size for node during getting data blocks */ From ef8d563f184e1112651f2cbde383d43e599334e8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 6 Mar 2020 15:36:09 +0800 Subject: [PATCH 22/51] f2fs: introduce F2FS_IOC_RELEASE_COMPRESS_BLOCKS There are still reserved blocks on compressed inode, this patch introduce a new ioctl to help release reserved blocks back to filesystem, so that userspace can reuse those freed space. ---- Daeho fixed a bug like below. Now, if writing pages and releasing compress blocks occur simultaneously, and releasing cblocks is executed more than one time to a file, then total block count of filesystem and block count of the file could be incorrect and damaged. We have to execute releasing compress blocks only one time for a file without being interfered by writepages path. --- Signed-off-by: Chao Yu Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 ++ fs/f2fs/file.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 174 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 20f047a07c5a..d916540f1281 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -428,6 +428,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) #define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64) #define F2FS_IOC_GET_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 17, __u64) +#define F2FS_IOC_RELEASE_COMPRESS_BLOCKS \ + _IOR(F2FS_IOCTL_MAGIC, 18, __u64) #define F2FS_IOC_GET_VOLUME_NAME FS_IOC_GETFSLABEL #define F2FS_IOC_SET_VOLUME_NAME FS_IOC_SETFSLABEL @@ -4048,6 +4050,10 @@ static inline void f2fs_i_compr_blocks_update(struct inode *inode, { int diff = F2FS_I(inode)->i_cluster_size - blocks; + /* don't update i_compr_blocks if saved blocks were released */ + if (!add && !F2FS_I(inode)->i_compr_blocks) + return; + if (add) { F2FS_I(inode)->i_compr_blocks += diff; stat_add_compr_blocks(inode, diff); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 81bfc5b44fa1..189f8ce046f0 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -563,6 +563,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) bool compressed_cluster = false; int cluster_index = 0, valid_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; + bool released = !F2FS_I(dn->inode)->i_compr_blocks; if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) base = get_extra_isize(dn->inode); @@ -601,7 +602,9 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); f2fs_invalidate_blocks(sbi, blkaddr); - nr_free++; + + if (!released || blkaddr != COMPRESS_ADDR) + nr_free++; } if (compressed_cluster) @@ -3434,6 +3437,167 @@ static int f2fs_get_compress_blocks(struct file *filp, unsigned long arg) return put_user(blocks, (u64 __user *)arg); } +static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); + unsigned int released_blocks = 0; + int cluster_size = F2FS_I(dn->inode)->i_cluster_size; + block_t blkaddr; + int i; + + for (i = 0; i < count; i++) { + blkaddr = data_blkaddr(dn->inode, dn->node_page, + dn->ofs_in_node + i); + + if (!__is_valid_data_blkaddr(blkaddr)) + continue; + if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, + DATA_GENERIC_ENHANCE))) + return -EFSCORRUPTED; + } + + while (count) { + int compr_blocks = 0; + + for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { + blkaddr = f2fs_data_blkaddr(dn); + + if (i == 0) { + if (blkaddr == COMPRESS_ADDR) + continue; + dn->ofs_in_node += cluster_size; + goto next; + } + + if (__is_valid_data_blkaddr(blkaddr)) + compr_blocks++; + + if (blkaddr != NEW_ADDR) + continue; + + dn->data_blkaddr = NULL_ADDR; + f2fs_set_data_blkaddr(dn); + } + + f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false); + dec_valid_block_count(sbi, dn->inode, + cluster_size - compr_blocks); + + released_blocks += cluster_size - compr_blocks; +next: + count -= cluster_size; + } + + return released_blocks; +} + +static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + pgoff_t page_idx = 0, last_idx; + unsigned int released_blocks = 0; + int ret; + int writecount; + + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return -EOPNOTSUPP; + + if (!f2fs_compressed_file(inode)) + return -EINVAL; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + f2fs_balance_fs(F2FS_I_SB(inode), true); + + inode_lock(inode); + + writecount = atomic_read(&inode->i_writecount); + if ((filp->f_mode & FMODE_WRITE && writecount != 1) || writecount) { + ret = -EBUSY; + goto out; + } + + if (IS_IMMUTABLE(inode)) { + ret = -EINVAL; + goto out; + } + + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); + if (ret) + goto out; + + if (!F2FS_I(inode)->i_compr_blocks) + goto out; + + F2FS_I(inode)->i_flags |= F2FS_IMMUTABLE_FL; + f2fs_set_inode_flags(inode); + inode->i_ctime = current_time(inode); + f2fs_mark_inode_dirty_sync(inode, true); + + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + + last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + + while (page_idx < last_idx) { + struct dnode_of_data dn; + pgoff_t end_offset, count; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); + if (ret) { + if (ret == -ENOENT) { + page_idx = f2fs_get_next_page_offset(&dn, + page_idx); + ret = 0; + continue; + } + break; + } + + end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); + count = roundup(count, F2FS_I(inode)->i_cluster_size); + + ret = release_compress_blocks(&dn, count); + + f2fs_put_dnode(&dn); + + if (ret < 0) + break; + + page_idx += count; + released_blocks += ret; + } + + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + up_write(&F2FS_I(inode)->i_mmap_sem); +out: + inode_unlock(inode); + + mnt_drop_write_file(filp); + + if (ret >= 0) { + ret = put_user(released_blocks, (u64 __user *)arg); + } else if (released_blocks && F2FS_I(inode)->i_compr_blocks) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " + "iblocks=%llu, released=%u, compr_blocks=%llu, " + "run fsck to fix.", + __func__, inode->i_ino, inode->i_blocks, + released_blocks, + F2FS_I(inode)->i_compr_blocks); + } + + return ret; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -3516,6 +3680,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_set_volume_name(filp, arg); case F2FS_IOC_GET_COMPRESS_BLOCKS: return f2fs_get_compress_blocks(filp, arg); + case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: + return f2fs_release_compress_blocks(filp, arg); default: return -ENOTTY; } @@ -3683,6 +3849,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GET_VOLUME_NAME: case F2FS_IOC_SET_VOLUME_NAME: case F2FS_IOC_GET_COMPRESS_BLOCKS: + case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: break; default: return -ENOIOCTLCMD; From 1f5f11a3c41e2b23288b2769435a00f74e02496b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 May 2020 12:25:45 -0700 Subject: [PATCH 23/51] f2fs: remove blk_plugging in block_operations blk_plugging doesn't seem to give any benefit. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5ba649e17c72..d49f7a01d8a2 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1166,11 +1166,8 @@ static int block_operations(struct f2fs_sb_info *sbi) .nr_to_write = LONG_MAX, .for_reclaim = 0, }; - struct blk_plug plug; int err = 0, cnt = 0; - blk_start_plug(&plug); - retry_flush_quotas: f2fs_lock_all(sbi); if (__need_flush_quota(sbi)) { @@ -1198,7 +1195,7 @@ retry_flush_dents: f2fs_unlock_all(sbi); err = f2fs_sync_dirty_inodes(sbi, DIR_INODE); if (err) - goto out; + return err; cond_resched(); goto retry_flush_quotas; } @@ -1214,7 +1211,7 @@ retry_flush_dents: f2fs_unlock_all(sbi); err = f2fs_sync_inode_meta(sbi); if (err) - goto out; + return err; cond_resched(); goto retry_flush_quotas; } @@ -1230,7 +1227,7 @@ retry_flush_nodes: if (err) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); - goto out; + return err; } cond_resched(); goto retry_flush_nodes; @@ -1242,8 +1239,6 @@ retry_flush_nodes: */ __prepare_cp_block(sbi); up_write(&sbi->node_change); -out: - blk_finish_plug(&plug); return err; } From f6644143c63f2eac88973f7fea087582579b0189 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 9 May 2020 15:01:04 +0800 Subject: [PATCH 24/51] f2fs: compress: let lz4 compressor handle output buffer budget properly Commonly, in order to handle lz4 worst compress case, caller should allocate buffer with size of LZ4_compressBound(inputsize) for target compressed data storing, however in this case, if caller didn't allocate enough space, lz4 compressor still can handle output buffer budget properly, and end up compressing when left space in output buffer is not enough. So we don't have to allocate buffer with size for worst case, then we can avoid 2 * 4KB size intermediate buffer allocation when log_cluster_size is 2, and avoid unnecessary compressing work of compressor if we can not save at least 4KB space. Suggested-by: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index c7c5a8f8a48c..dcedf008f359 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -227,7 +227,12 @@ static int lz4_init_compress_ctx(struct compress_ctx *cc) if (!cc->private) return -ENOMEM; - cc->clen = LZ4_compressBound(PAGE_SIZE << cc->log_cluster_size); + /* + * we do not change cc->clen to LZ4_compressBound(inputsize) to + * adapt worst compress case, because lz4 compressor can handle + * output budget properly. + */ + cc->clen = cc->rlen - PAGE_SIZE - COMPRESS_HEADER_SIZE; return 0; } @@ -243,11 +248,9 @@ static int lz4_compress_pages(struct compress_ctx *cc) len = LZ4_compress_default(cc->rbuf, cc->cbuf->cdata, cc->rlen, cc->clen, cc->private); - if (!len) { - printk_ratelimited("%sF2FS-fs (%s): lz4 compress failed\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id); - return -EIO; - } + if (!len) + return -EAGAIN; + cc->clen = len; return 0; } From 48abe91ac1ad27cd5a5709f983dcf58f2b9a6b70 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 9 May 2020 19:21:35 +0800 Subject: [PATCH 25/51] f2fs: Fix wrong stub helper update_sit_info update_sit_info should be f2fs_update_sit_info, otherwise build fails while no CONFIG_F2FS_STAT_FS. Fixes: fc7100ea2a52 ("f2fs: Add f2fs stats to sysfs") Signed-off-by: YueHaibing Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d916540f1281..2a8ea81c52a1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3728,7 +3728,7 @@ static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } static inline void __init f2fs_create_root_stats(void) { } static inline void f2fs_destroy_root_stats(void) { } -static inline void update_sit_info(struct f2fs_sb_info *sbi) {} +static inline void f2fs_update_sit_info(struct f2fs_sb_info *sbi) {} #endif extern const struct file_operations f2fs_dir_operations; From baaa7ebf25c78c5cb712fac16b7f549100beddd3 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 11 May 2020 09:15:18 +0300 Subject: [PATCH 26/51] f2fs: report delalloc reserve as non-free in statfs for project quota This reserved space isn't committed yet but cannot be used for allocations. For userspace it has no difference from used space. See the same fix in ext4 commit f06925c73942 ("ext4: report delalloc reserve as non-free in statfs for project quota"). Fixes: ddc34e328d06 ("f2fs: introduce f2fs_statfs_project") Signed-off-by: Konstantin Khlebnikov Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 582bbf40c559..f6c5c7ec5a12 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1283,7 +1283,8 @@ static int f2fs_statfs_project(struct super_block *sb, limit >>= sb->s_blocksize_bits; if (limit && buf->f_blocks > limit) { - curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; + curblock = (dquot->dq_dqb.dqb_curspace + + dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits; buf->f_blocks = limit; buf->f_bfree = buf->f_bavail = (buf->f_blocks > curblock) ? From 34c061ad85a2f5d5e9e3b045d72f3b211db6e282 Mon Sep 17 00:00:00 2001 From: Sayali Lokhande Date: Thu, 30 Apr 2020 16:28:29 +0530 Subject: [PATCH 27/51] f2fs: Avoid double lock for cp_rwsem during checkpoint There could be a scenario where f2fs_sync_node_pages gets called during checkpoint, which in turn tries to flush inline data and calls iput(). This results in deadlock as iput() tries to hold cp_rwsem, which is already held at the beginning by checkpoint->block_operations(). Call stack : Thread A Thread B f2fs_write_checkpoint() - block_operations(sbi) - f2fs_lock_all(sbi); - down_write(&sbi->cp_rwsem); - open() - igrab() - write() write inline data - unlink() - f2fs_sync_node_pages() - if (is_inline_node(page)) - flush_inline_data() - ilookup() page = f2fs_pagecache_get_page() if (!page) goto iput_out; iput_out: -close() -iput() iput(inode); - f2fs_evict_inode() - f2fs_truncate_blocks() - f2fs_lock_op() - down_read(&sbi->cp_rwsem); Fixes: 2049d4fcb057 ("f2fs: avoid multiple node page writes due to inline_data") Signed-off-by: Sayali Lokhande Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 +++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 51 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index d49f7a01d8a2..79e605f38f4f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1168,6 +1168,11 @@ static int block_operations(struct f2fs_sb_info *sbi) }; int err = 0, cnt = 0; + /* + * Let's flush inline_data in dirty node pages. + */ + f2fs_flush_inline_data(sbi); + retry_flush_quotas: f2fs_lock_all(sbi); if (__need_flush_quota(sbi)) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2a8ea81c52a1..7f3d259e7e37 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3282,6 +3282,7 @@ void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); struct page *f2fs_get_node_page_ra(struct page *parent, int start); int f2fs_move_node_page(struct page *node_page, int gc_type); +int f2fs_flush_inline_data(struct f2fs_sb_info *sbi); int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic, unsigned int *seq_id); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1db8cabf727e..e632de10aeda 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1807,6 +1807,53 @@ static bool flush_dirty_inode(struct page *page) return true; } +int f2fs_flush_inline_data(struct f2fs_sb_info *sbi) +{ + pgoff_t index = 0; + struct pagevec pvec; + int nr_pages; + int ret = 0; + + pagevec_init(&pvec); + + while ((nr_pages = pagevec_lookup_tag(&pvec, + NODE_MAPPING(sbi), &index, PAGECACHE_TAG_DIRTY))) { + int i; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + if (!IS_DNODE(page)) + continue; + + lock_page(page); + + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { +continue_unlock: + unlock_page(page); + continue; + } + + if (!PageDirty(page)) { + /* someone wrote it for us */ + goto continue_unlock; + } + + /* flush inline_data, if it's async context. */ + if (is_inline_node(page)) { + clear_inline_node(page); + unlock_page(page); + flush_inline_data(sbi, ino_of_node(page)); + continue; + } + unlock_page(page); + } + pagevec_release(&pvec); + cond_resched(); + } + return ret; +} + int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, bool do_balance, enum iostat_type io_type) @@ -1870,8 +1917,8 @@ continue_unlock: goto continue_unlock; } - /* flush inline_data */ - if (is_inline_node(page)) { + /* flush inline_data, if it's async context. */ + if (do_balance && is_inline_node(page)) { clear_inline_node(page); unlock_page(page); flush_inline_data(sbi, ino_of_node(page)); From c75488fb4d82b697f381f855bf5b16779df440aa Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 6 Mar 2020 14:35:33 +0800 Subject: [PATCH 28/51] f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS This patch introduces a new ioctl to rollback all compress inode status: - add reserved blocks in dnode blocks - increase i_compr_blocks, i_blocks, total_valid_block_count - remove immutable flag Then compress inode can be restored to support overwrite functionality again. Signee-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 + fs/f2fs/file.c | 162 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7f3d259e7e37..1d96f733b1b7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -430,6 +430,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_GET_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 17, __u64) #define F2FS_IOC_RELEASE_COMPRESS_BLOCKS \ _IOR(F2FS_IOCTL_MAGIC, 18, __u64) +#define F2FS_IOC_RESERVE_COMPRESS_BLOCKS \ + _IOR(F2FS_IOCTL_MAGIC, 19, __u64) #define F2FS_IOC_GET_VOLUME_NAME FS_IOC_GETFSLABEL #define F2FS_IOC_SET_VOLUME_NAME FS_IOC_SETFSLABEL diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 189f8ce046f0..48d908811807 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3598,6 +3598,165 @@ out: return ret; } +static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); + unsigned int reserved_blocks = 0; + int cluster_size = F2FS_I(dn->inode)->i_cluster_size; + block_t blkaddr; + int i; + + for (i = 0; i < count; i++) { + blkaddr = data_blkaddr(dn->inode, dn->node_page, + dn->ofs_in_node + i); + + if (!__is_valid_data_blkaddr(blkaddr)) + continue; + if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, + DATA_GENERIC_ENHANCE))) + return -EFSCORRUPTED; + } + + while (count) { + int compr_blocks = 0; + blkcnt_t reserved; + int ret; + + for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { + blkaddr = f2fs_data_blkaddr(dn); + + if (i == 0) { + if (blkaddr == COMPRESS_ADDR) + continue; + dn->ofs_in_node += cluster_size; + goto next; + } + + if (__is_valid_data_blkaddr(blkaddr)) { + compr_blocks++; + continue; + } + + dn->data_blkaddr = NEW_ADDR; + f2fs_set_data_blkaddr(dn); + } + + reserved = cluster_size - compr_blocks; + ret = inc_valid_block_count(sbi, dn->inode, &reserved); + if (ret) + return ret; + + if (reserved != cluster_size - compr_blocks) + return -ENOSPC; + + f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); + + reserved_blocks += reserved; +next: + count -= cluster_size; + } + + return reserved_blocks; +} + +static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + pgoff_t page_idx = 0, last_idx; + unsigned int reserved_blocks = 0; + int ret; + + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return -EOPNOTSUPP; + + if (!f2fs_compressed_file(inode)) + return -EINVAL; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (F2FS_I(inode)->i_compr_blocks) + goto out; + + f2fs_balance_fs(F2FS_I_SB(inode), true); + + inode_lock(inode); + + if (!IS_IMMUTABLE(inode)) { + ret = -EINVAL; + goto unlock_inode; + } + + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + + last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + + while (page_idx < last_idx) { + struct dnode_of_data dn; + pgoff_t end_offset, count; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); + if (ret) { + if (ret == -ENOENT) { + page_idx = f2fs_get_next_page_offset(&dn, + page_idx); + ret = 0; + continue; + } + break; + } + + end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); + count = roundup(count, F2FS_I(inode)->i_cluster_size); + + ret = reserve_compress_blocks(&dn, count); + + f2fs_put_dnode(&dn); + + if (ret < 0) + break; + + page_idx += count; + reserved_blocks += ret; + } + + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + up_write(&F2FS_I(inode)->i_mmap_sem); + + if (ret >= 0) { + F2FS_I(inode)->i_flags &= ~F2FS_IMMUTABLE_FL; + f2fs_set_inode_flags(inode); + inode->i_ctime = current_time(inode); + f2fs_mark_inode_dirty_sync(inode, true); + } +unlock_inode: + inode_unlock(inode); +out: + mnt_drop_write_file(filp); + + if (ret >= 0) { + ret = put_user(reserved_blocks, (u64 __user *)arg); + } else if (reserved_blocks && F2FS_I(inode)->i_compr_blocks) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " + "iblocks=%llu, reserved=%u, compr_blocks=%llu, " + "run fsck to fix.", + __func__, inode->i_ino, inode->i_blocks, + reserved_blocks, + F2FS_I(inode)->i_compr_blocks); + } + + return ret; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -3682,6 +3841,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_get_compress_blocks(filp, arg); case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: return f2fs_release_compress_blocks(filp, arg); + case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: + return f2fs_reserve_compress_blocks(filp, arg); default: return -ENOTTY; } @@ -3850,6 +4011,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_SET_VOLUME_NAME: case F2FS_IOC_GET_COMPRESS_BLOCKS: case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: + case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: break; default: return -ENOIOCTLCMD; From 4fec3fc026717f81e34fca59937b0acbfb05642d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 8 Apr 2020 19:55:17 +0800 Subject: [PATCH 29/51] f2fs: use round_up to enhance calculation .i_cluster_size should be power of 2, so we can use round_up() instead of roundup() to enhance the calculation. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 48d908811807..97cc95a4a8ed 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -742,16 +742,9 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) * for compressed file, only support cluster size * aligned truncation. */ - if (f2fs_compressed_file(inode)) { - size_t cluster_shift = PAGE_SHIFT + - F2FS_I(inode)->i_log_cluster_size; - size_t cluster_mask = (1 << cluster_shift) - 1; - - free_from = from >> cluster_shift; - if (from & cluster_mask) - free_from++; - free_from <<= cluster_shift; - } + if (f2fs_compressed_file(inode)) + free_from = round_up(from, + F2FS_I(inode)->i_cluster_size << PAGE_SHIFT); #endif err = f2fs_do_truncate_blocks(inode, free_from, lock); @@ -3563,7 +3556,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) end_offset = ADDRS_PER_PAGE(dn.node_page, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); - count = roundup(count, F2FS_I(inode)->i_cluster_size); + count = round_up(count, F2FS_I(inode)->i_cluster_size); ret = release_compress_blocks(&dn, count); @@ -3715,7 +3708,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) end_offset = ADDRS_PER_PAGE(dn.node_page, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); - count = roundup(count, F2FS_I(inode)->i_cluster_size); + count = round_up(count, F2FS_I(inode)->i_cluster_size); ret = reserve_compress_blocks(&dn, count); From b4b10061ef98c583bcf82a4200703fbaa98c18dc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 31 Mar 2020 11:43:07 -0700 Subject: [PATCH 30/51] f2fs: refactor resize_fs to avoid meta updates in progress Sahitya raised an issue: - prevent meta updates while checkpoint is in progress allocate_segment_for_resize() can cause metapage updates if it requires to change the current node/data segments for resizing. Stop these meta updates when there is a checkpoint already in progress to prevent inconsistent CP data. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 5 +- fs/f2fs/gc.c | 119 +++++++++++++++++++++--------------- fs/f2fs/super.c | 1 - include/trace/events/f2fs.h | 4 +- 6 files changed, 78 insertions(+), 59 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 79e605f38f4f..620a386d82c1 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1559,7 +1559,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) return 0; f2fs_warn(sbi, "Start checkpoint disabled!"); } - mutex_lock(&sbi->cp_mutex); + if (cpc->reason != CP_RESIZE) + mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || @@ -1628,7 +1629,8 @@ stop: f2fs_update_time(sbi, CP_TIME); trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: - mutex_unlock(&sbi->cp_mutex); + if (cpc->reason != CP_RESIZE) + mutex_unlock(&sbi->cp_mutex); return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1d96f733b1b7..338622069e42 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -194,6 +194,7 @@ enum { #define CP_DISCARD 0x00000010 #define CP_TRIMMED 0x00000020 #define CP_PAUSE 0x00000040 +#define CP_RESIZE 0x00000080 #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) #define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ @@ -1471,7 +1472,6 @@ struct f2fs_sb_info { unsigned int segs_per_sec; /* segments per section */ unsigned int secs_per_zone; /* sections per zone */ unsigned int total_sections; /* total section count */ - struct mutex resize_mutex; /* for resize exclusion */ unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 97cc95a4a8ed..f7de2a1da528 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3312,7 +3312,6 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) { struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); __u64 block_count; - int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -3324,9 +3323,7 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) sizeof(block_count))) return -EFAULT; - ret = f2fs_resize_fs(sbi, block_count); - - return ret; + return f2fs_resize_fs(sbi, block_count); } static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 28a8c79c8bdc..f3c45ec2a7e7 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -1405,12 +1406,29 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } -static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, - unsigned int end) +static int free_segment_range(struct f2fs_sb_info *sbi, + unsigned int secs, bool gc_only) { - int type; - unsigned int segno, next_inuse; + unsigned int segno, next_inuse, start, end; + struct cp_control cpc = { CP_RESIZE, 0, 0, 0 }; + int gc_mode, gc_type; int err = 0; + int type; + + /* Force block allocation for GC */ + MAIN_SECS(sbi) -= secs; + start = MAIN_SECS(sbi) * sbi->segs_per_sec; + end = MAIN_SEGS(sbi) - 1; + + mutex_lock(&DIRTY_I(sbi)->seglist_lock); + for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++) + if (SIT_I(sbi)->last_victim[gc_mode] >= start) + SIT_I(sbi)->last_victim[gc_mode] = 0; + + for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++) + if (sbi->next_victim_seg[gc_type] >= start) + sbi->next_victim_seg[gc_type] = NULL_SEGNO; + mutex_unlock(&DIRTY_I(sbi)->seglist_lock); /* Move out cursegs from the target range */ for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++) @@ -1423,18 +1441,24 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; - down_write(&sbi->gc_lock); do_garbage_collect(sbi, segno, &gc_list, FG_GC); - up_write(&sbi->gc_lock); put_gc_inode(&gc_list); - if (get_valid_blocks(sbi, segno, true)) - return -EAGAIN; + if (!gc_only && get_valid_blocks(sbi, segno, true)) { + err = -EAGAIN; + goto out; + } + if (fatal_signal_pending(current)) { + err = -ERESTARTSYS; + goto out; + } } + if (gc_only) + goto out; - err = f2fs_sync_fs(sbi->sb, 1); + err = f2fs_write_checkpoint(sbi, &cpc); if (err) - return err; + goto out; next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start); if (next_inuse <= end) { @@ -1442,6 +1466,8 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, next_inuse); f2fs_bug_on(sbi, 1); } +out: + MAIN_SECS(sbi) += secs; return err; } @@ -1487,6 +1513,7 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs; MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs; + MAIN_SECS(sbi) += secs; FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs; FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs; F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks); @@ -1508,8 +1535,8 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) { __u64 old_block_count, shrunk_blocks; + struct cp_control cpc = { CP_RESIZE, 0, 0, 0 }; unsigned int secs; - int gc_mode, gc_type; int err = 0; __u32 rem; @@ -1544,10 +1571,27 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) return -EINVAL; } - freeze_bdev(sbi->sb->s_bdev); - shrunk_blocks = old_block_count - block_count; secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); + + /* stop other GC */ + if (!down_write_trylock(&sbi->gc_lock)) + return -EAGAIN; + + /* stop CP to protect MAIN_SEC in free_segment_range */ + f2fs_lock_op(sbi); + err = free_segment_range(sbi, secs, true); + f2fs_unlock_op(sbi); + up_write(&sbi->gc_lock); + if (err) + return err; + + set_sbi_flag(sbi, SBI_IS_RESIZEFS); + + freeze_super(sbi->sb); + down_write(&sbi->gc_lock); + mutex_lock(&sbi->cp_mutex); + spin_lock(&sbi->stat_lock); if (shrunk_blocks + valid_user_blocks(sbi) + sbi->current_reserved_blocks + sbi->unusable_block_count + @@ -1556,69 +1600,44 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) else sbi->user_block_count -= shrunk_blocks; spin_unlock(&sbi->stat_lock); - if (err) { - thaw_bdev(sbi->sb->s_bdev, sbi->sb); - return err; - } - - mutex_lock(&sbi->resize_mutex); - set_sbi_flag(sbi, SBI_IS_RESIZEFS); - - mutex_lock(&DIRTY_I(sbi)->seglist_lock); - - MAIN_SECS(sbi) -= secs; - - for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++) - if (SIT_I(sbi)->last_victim[gc_mode] >= - MAIN_SECS(sbi) * sbi->segs_per_sec) - SIT_I(sbi)->last_victim[gc_mode] = 0; - - for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++) - if (sbi->next_victim_seg[gc_type] >= - MAIN_SECS(sbi) * sbi->segs_per_sec) - sbi->next_victim_seg[gc_type] = NULL_SEGNO; - - mutex_unlock(&DIRTY_I(sbi)->seglist_lock); - - err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec, - MAIN_SEGS(sbi) - 1); if (err) - goto out; + goto out_err; + + err = free_segment_range(sbi, secs, false); + if (err) + goto recover_out; update_sb_metadata(sbi, -secs); err = f2fs_commit_super(sbi, false); if (err) { update_sb_metadata(sbi, secs); - goto out; + goto recover_out; } - mutex_lock(&sbi->cp_mutex); update_fs_metadata(sbi, -secs); clear_sbi_flag(sbi, SBI_IS_RESIZEFS); set_sbi_flag(sbi, SBI_IS_DIRTY); - mutex_unlock(&sbi->cp_mutex); - err = f2fs_sync_fs(sbi->sb, 1); + err = f2fs_write_checkpoint(sbi, &cpc); if (err) { - mutex_lock(&sbi->cp_mutex); update_fs_metadata(sbi, secs); - mutex_unlock(&sbi->cp_mutex); update_sb_metadata(sbi, secs); f2fs_commit_super(sbi, false); } -out: +recover_out: if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_err(sbi, "resize_fs failed, should run fsck to repair!"); - MAIN_SECS(sbi) += secs; spin_lock(&sbi->stat_lock); sbi->user_block_count += shrunk_blocks; spin_unlock(&sbi->stat_lock); } +out_err: + mutex_unlock(&sbi->cp_mutex); + up_write(&sbi->gc_lock); + thaw_super(sbi->sb); clear_sbi_flag(sbi, SBI_IS_RESIZEFS); - mutex_unlock(&sbi->resize_mutex); - thaw_bdev(sbi->sb->s_bdev, sbi->sb); return err; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f6c5c7ec5a12..441eaaf9739c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3408,7 +3408,6 @@ try_onemore: init_rwsem(&sbi->gc_lock); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); - mutex_init(&sbi->resize_mutex); init_rwsem(&sbi->node_write); init_rwsem(&sbi->node_change); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 757d3d6031e6..4dbcdc6d2738 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -50,6 +50,7 @@ TRACE_DEFINE_ENUM(CP_RECOVERY); TRACE_DEFINE_ENUM(CP_DISCARD); TRACE_DEFINE_ENUM(CP_TRIMMED); TRACE_DEFINE_ENUM(CP_PAUSE); +TRACE_DEFINE_ENUM(CP_RESIZE); #define show_block_type(type) \ __print_symbolic(type, \ @@ -126,7 +127,8 @@ TRACE_DEFINE_ENUM(CP_PAUSE); { CP_RECOVERY, "Recovery" }, \ { CP_DISCARD, "Discard" }, \ { CP_PAUSE, "Pause" }, \ - { CP_TRIMMED, "Trimmed" }) + { CP_TRIMMED, "Trimmed" }, \ + { CP_RESIZE, "Resize" }) #define show_fsync_cpreason(type) \ __print_symbolic(type, \ From deaf160f8aa767f1b2cd53f1834c1a4126815631 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 20 Apr 2020 23:00:57 +0100 Subject: [PATCH 31/51] f2fs: remove redundant assignment to variable err The variable err is being assigned with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 58db1de7ca94..63b34a161cf4 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -564,7 +564,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) struct inode *inode = d_inode(dentry); struct f2fs_dir_entry *de; struct page *page; - int err = -ENOENT; + int err; trace_f2fs_unlink_enter(dir, dentry); From 03382f1aa99f438f8e9dc7562c7368f55ba8f56c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 21 Apr 2020 19:36:21 +0800 Subject: [PATCH 32/51] f2fs: compress: don't handle non-compressed data in workqueue If bio has no compressed data, we don't need to handle end_io work in workqueue, instead, it should just let interrupter handle it directly to speed up IO response. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ff8a92df8d99..8607c02e8f96 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -114,7 +114,8 @@ static enum count_type __read_io_type(struct page *page) /* postprocessing steps for read bios */ enum bio_post_read_step { STEP_DECRYPT, - STEP_DECOMPRESS, + STEP_DECOMPRESS_NOWQ, /* handle normal cluster data inplace */ + STEP_DECOMPRESS, /* handle compressed cluster data in workqueue */ STEP_VERITY, }; @@ -990,7 +991,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, if (f2fs_encrypted_file(inode)) post_read_steps |= 1 << STEP_DECRYPT; if (f2fs_compressed_file(inode)) - post_read_steps |= 1 << STEP_DECOMPRESS; + post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ; if (f2fs_need_verity(inode, first_idx)) post_read_steps |= 1 << STEP_VERITY; @@ -2189,6 +2190,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, for (i = 0; i < dic->nr_cpages; i++) { struct page *page = dic->cpages[i]; block_t blkaddr; + struct bio_post_read_ctx *ctx; blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i + 1); @@ -2225,6 +2227,11 @@ submit_and_realloc: if (bio_add_page(bio, page, blocksize, 0) < blocksize) goto submit_and_realloc; + /* tag STEP_DECOMPRESS to handle IO in wq */ + ctx = bio->bi_private; + if (!(ctx->enabled_steps & (1 << STEP_DECOMPRESS))) + ctx->enabled_steps |= 1 << STEP_DECOMPRESS; + inc_page_count(sbi, F2FS_RD_DATA); f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); ClearPageError(page); From f3494345ce9999624b36109252a4bf5f00e51a46 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 23 Apr 2020 17:57:33 +0800 Subject: [PATCH 33/51] f2fs: fix potential use-after-free issue In error path of f2fs_read_multi_pages(), it should let last referrer release decompress io context memory, otherwise, other referrer will cause use-after-free issue. Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8607c02e8f96..4d871d27a85f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2208,16 +2208,16 @@ submit_and_realloc: page->index, for_write); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - bio = NULL; dic->failed = true; if (refcount_sub_and_test(dic->nr_cpages - i, - &dic->ref)) + &dic->ref)) { f2fs_decompress_end_io(dic->rpages, cc->cluster_size, true, false); - f2fs_free_dic(dic); + f2fs_free_dic(dic); + } f2fs_put_dnode(&dn); - *bio_ret = bio; + *bio_ret = NULL; return ret; } } From 9c1223845a37ce09fd498b8c8ed061decff20eda Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 23 Apr 2020 18:03:06 +0800 Subject: [PATCH 34/51] f2fs: add compressed/gc data read IO stat in order to account data read IOs more accurately. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 + fs/f2fs/f2fs.h | 2 ++ fs/f2fs/gc.c | 2 ++ fs/f2fs/sysfs.c | 7 +++++++ include/trace/events/f2fs.h | 11 ++++++++--- 5 files changed, 20 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4d871d27a85f..48a622b95b76 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2234,6 +2234,7 @@ submit_and_realloc: inc_page_count(sbi, F2FS_RD_DATA); f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, FS_CDATA_READ_IO, F2FS_BLKSIZE); ClearPageError(page); *last_block_in_bio = blkaddr; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 338622069e42..51863e4f5d4e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1149,6 +1149,8 @@ enum iostat_type { APP_READ_IO, /* app read IOs */ APP_MAPPED_READ_IO, /* app mapped read IOs */ FS_DATA_READ_IO, /* data read IOs */ + FS_GDATA_READ_IO, /* data read IOs from background gc */ + FS_CDATA_READ_IO, /* compressed data read IOs */ FS_NODE_READ_IO, /* node read IOs */ FS_META_READ_IO, /* meta read IOs */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index f3c45ec2a7e7..5b95d5a146eb 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -740,6 +740,7 @@ got_it: f2fs_put_page(page, 1); f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE); return 0; put_encrypted_page: @@ -846,6 +847,7 @@ static int move_data_block(struct inode *inode, block_t bidx, } f2fs_update_iostat(fio.sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(fio.sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE); lock_page(mpage); if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index eaf8088548f0..a117ae1f9d5f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -803,6 +803,7 @@ static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, seq_printf(seq, "time: %-16llu\n", now); /* print app write IOs */ + seq_puts(seq, "[WRITE]\n"); seq_printf(seq, "app buffered: %-16llu\n", sbi->rw_iostat[APP_BUFFERED_IO]); seq_printf(seq, "app direct: %-16llu\n", @@ -829,6 +830,7 @@ static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, sbi->rw_iostat[FS_CP_META_IO]); /* print app read IOs */ + seq_puts(seq, "[READ]\n"); seq_printf(seq, "app buffered: %-16llu\n", sbi->rw_iostat[APP_BUFFERED_READ_IO]); seq_printf(seq, "app direct: %-16llu\n", @@ -839,12 +841,17 @@ static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, /* print fs read IOs */ seq_printf(seq, "fs data: %-16llu\n", sbi->rw_iostat[FS_DATA_READ_IO]); + seq_printf(seq, "fs gc data: %-16llu\n", + sbi->rw_iostat[FS_GDATA_READ_IO]); + seq_printf(seq, "fs compr_data: %-16llu\n", + sbi->rw_iostat[FS_CDATA_READ_IO]); seq_printf(seq, "fs node: %-16llu\n", sbi->rw_iostat[FS_NODE_READ_IO]); seq_printf(seq, "fs meta: %-16llu\n", sbi->rw_iostat[FS_META_READ_IO]); /* print other IOs */ + seq_puts(seq, "[OTHER]\n"); seq_printf(seq, "fs discard: %-16llu\n", sbi->rw_iostat[FS_DISCARD]); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 4dbcdc6d2738..a8e2e83c91ce 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1840,6 +1840,8 @@ TRACE_EVENT(f2fs_iostat, __field(unsigned long long, app_rio) __field(unsigned long long, app_mrio) __field(unsigned long long, fs_drio) + __field(unsigned long long, fs_gdrio) + __field(unsigned long long, fs_cdrio) __field(unsigned long long, fs_nrio) __field(unsigned long long, fs_mrio) __field(unsigned long long, fs_discard) @@ -1864,6 +1866,8 @@ TRACE_EVENT(f2fs_iostat, __entry->app_rio = iostat[APP_READ_IO]; __entry->app_mrio = iostat[APP_MAPPED_READ_IO]; __entry->fs_drio = iostat[FS_DATA_READ_IO]; + __entry->fs_gdrio = iostat[FS_GDATA_READ_IO]; + __entry->fs_cdrio = iostat[FS_CDATA_READ_IO]; __entry->fs_nrio = iostat[FS_NODE_READ_IO]; __entry->fs_mrio = iostat[FS_META_READ_IO]; __entry->fs_discard = iostat[FS_DISCARD]; @@ -1875,15 +1879,16 @@ TRACE_EVENT(f2fs_iostat, "gc [data=%llu, node=%llu], " "cp [data=%llu, node=%llu, meta=%llu], " "app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], " - "fs [data=%llu, node=%llu, meta=%llu]", + "fs [data=%llu, (gc_data=%llu, compr_data=%llu), " + "node=%llu, meta=%llu]", show_dev(__entry->dev), __entry->app_wio, __entry->app_dio, __entry->app_bio, __entry->app_mio, __entry->fs_dio, __entry->fs_nio, __entry->fs_mio, __entry->fs_discard, __entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio, __entry->fs_cp_nio, __entry->fs_cp_mio, __entry->app_rio, __entry->app_drio, __entry->app_brio, - __entry->app_mrio, __entry->fs_drio, __entry->fs_nrio, - __entry->fs_mrio) + __entry->app_mrio, __entry->fs_drio, __entry->fs_gdrio, + __entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio) ); #endif /* _TRACE_F2FS_H */ From 1454c978efbb57b052670d50023f48c759d704ce Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 8 May 2020 09:16:03 +0800 Subject: [PATCH 35/51] f2fs: compress: fix zstd data corruption During zstd compression, ZSTD_endStream() may return non-zero value because distination buffer is full, but there is still compressed data remained in intermediate buffer, it means that zstd algorithm can not save at last one block space, let's just writeback raw data instead of compressed one, this can fix data corruption when decompressing incomplete stored compression data. Fixes: 50cfa66f0de0 ("f2fs: compress: support zstd compress algorithm") Signed-off-by: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index dcedf008f359..bf152c0d79fe 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -360,6 +360,13 @@ static int zstd_compress_pages(struct compress_ctx *cc) return -EIO; } + /* + * there is compressed data remained in intermediate buffer due to + * no more space in cbuf.cdata + */ + if (ret) + return -EAGAIN; + cc->clen = outbuf.pos; return 0; } From 1ae18f71cb522684bac1718f5c188fb5e30eb23d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 15 May 2020 17:20:50 -0700 Subject: [PATCH 36/51] f2fs: fix checkpoint=disable:%u%% When parsing the mount option, we don't have sbi->user_block_count. Should do it after getting it. Cc: Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 25 +++++++++++++++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 51863e4f5d4e..fb180020e175 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -139,6 +139,7 @@ struct f2fs_mount_info { int fs_mode; /* fs mode: LFS or ADAPTIVE */ int bggc_mode; /* bggc mode: off, on or sync */ bool test_dummy_encryption; /* test dummy encryption */ + block_t unusable_cap_perc; /* percentage for cap */ block_t unusable_cap; /* Amount of space allowed to be * unusable when disabling checkpoint */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 441eaaf9739c..a71da699cb2d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -284,6 +284,22 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).s_resgid)); } +static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi) +{ + if (!F2FS_OPTION(sbi).unusable_cap_perc) + return; + + if (F2FS_OPTION(sbi).unusable_cap_perc == 100) + F2FS_OPTION(sbi).unusable_cap = sbi->user_block_count; + else + F2FS_OPTION(sbi).unusable_cap = (sbi->user_block_count / 100) * + F2FS_OPTION(sbi).unusable_cap_perc; + + f2fs_info(sbi, "Adjust unusable cap for checkpoint=disable = %u / %u%%", + F2FS_OPTION(sbi).unusable_cap, + F2FS_OPTION(sbi).unusable_cap_perc); +} + static void init_once(void *foo) { struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo; @@ -785,12 +801,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; if (arg < 0 || arg > 100) return -EINVAL; - if (arg == 100) - F2FS_OPTION(sbi).unusable_cap = - sbi->user_block_count; - else - F2FS_OPTION(sbi).unusable_cap = - (sbi->user_block_count / 100) * arg; + F2FS_OPTION(sbi).unusable_cap_perc = arg; set_opt(sbi, DISABLE_CHECKPOINT); break; case Opt_checkpoint_disable_cap: @@ -1840,6 +1851,7 @@ skip: (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); limit_reserve_root(sbi); + adjust_unusable_cap_perc(sbi); *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); return 0; restore_gc: @@ -3516,6 +3528,7 @@ try_onemore: sbi->reserved_blocks = 0; sbi->current_reserved_blocks = 0; limit_reserve_root(sbi); + adjust_unusable_cap_perc(sbi); for (i = 0; i < NR_INODE_TYPE; i++) { INIT_LIST_HEAD(&sbi->inode_list[i]); From 9c30df7c5a304e383592587cb2a41bb2ab0ac80d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 May 2020 21:12:53 -0700 Subject: [PATCH 37/51] f2fs: flush dirty meta pages when flushing them Let's guarantee flusing dirty meta pages to avoid infinite loop. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 620a386d82c1..3dc3ac6fe143 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1266,6 +1266,9 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type) if (unlikely(f2fs_cp_error(sbi))) break; + if (type == F2FS_DIRTY_META) + f2fs_sync_meta_pages(sbi, META, LONG_MAX, + FS_CP_META_IO); io_schedule_timeout(DEFAULT_IO_TIMEOUT); } finish_wait(&sbi->cp_wait, &wait); From 6d7c865c2714b122a940774990cfb1d87b57294a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 18 May 2020 18:00:33 -0700 Subject: [PATCH 38/51] f2fs: avoid inifinite loop to wait for flushing node pages at cp_error Shutdown test is somtimes hung, since it keeps trying to flush dirty node pages in an inifinite loop. Let's drop dirty pages at umount in that case. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e632de10aeda..e0bb0f7e0506 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1520,8 +1520,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, trace_f2fs_writepage(page, NODE); - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { + if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) { + ClearPageUptodate(page); + dec_page_count(sbi, F2FS_DIRTY_NODES); + unlock_page(page); + return 0; + } goto redirty_out; + } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; From 195f406543e555611330b98ea8c21f113a192fc3 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 26 May 2020 17:05:43 +0800 Subject: [PATCH 39/51] f2fs: code cleanup by removing ifdef macro surrounding Define f2fs_listxattr and to NULL when CONFIG_F2FS_FS_XATTR is not enabled, then we can remove many ugly ifdef macros in the code. Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 -- fs/f2fs/namei.c | 8 -------- fs/f2fs/xattr.h | 6 +----- 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f7de2a1da528..67fd1c900eb4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -979,9 +979,7 @@ const struct inode_operations f2fs_file_inode_operations = { .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, .set_acl = f2fs_set_acl, -#ifdef CONFIG_F2FS_FS_XATTR .listxattr = f2fs_listxattr, -#endif .fiemap = f2fs_fiemap, }; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 63b34a161cf4..8f55201441f7 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1287,9 +1287,7 @@ const struct inode_operations f2fs_encrypted_symlink_inode_operations = { .get_link = f2fs_encrypted_get_link, .getattr = f2fs_getattr, .setattr = f2fs_setattr, -#ifdef CONFIG_F2FS_FS_XATTR .listxattr = f2fs_listxattr, -#endif }; const struct inode_operations f2fs_dir_inode_operations = { @@ -1307,9 +1305,7 @@ const struct inode_operations f2fs_dir_inode_operations = { .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, .set_acl = f2fs_set_acl, -#ifdef CONFIG_F2FS_FS_XATTR .listxattr = f2fs_listxattr, -#endif .fiemap = f2fs_fiemap, }; @@ -1317,9 +1313,7 @@ const struct inode_operations f2fs_symlink_inode_operations = { .get_link = f2fs_get_link, .getattr = f2fs_getattr, .setattr = f2fs_setattr, -#ifdef CONFIG_F2FS_FS_XATTR .listxattr = f2fs_listxattr, -#endif }; const struct inode_operations f2fs_special_inode_operations = { @@ -1327,7 +1321,5 @@ const struct inode_operations f2fs_special_inode_operations = { .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, .set_acl = f2fs_set_acl, -#ifdef CONFIG_F2FS_FS_XATTR .listxattr = f2fs_listxattr, -#endif }; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 6a192e6c7a9e..416d652774a3 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -136,6 +136,7 @@ extern void f2fs_destroy_xattr_caches(struct f2fs_sb_info *); #else #define f2fs_xattr_handlers NULL +#define f2fs_listxattr NULL static inline int f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, struct page *page, int flags) @@ -148,11 +149,6 @@ static inline int f2fs_getxattr(struct inode *inode, int index, { return -EOPNOTSUPP; } -static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, - size_t buffer_size) -{ - return -EOPNOTSUPP; -} static inline int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { } #endif From fd6126484ceaa9d94db196931c10454090f3d677 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Wed, 27 May 2020 13:02:31 +0900 Subject: [PATCH 40/51] f2fs: protect new segment allocation in expand_inode_data Found a new segemnt allocation without f2fs_lock_op() in expand_inode_data(). So, when we do fallocate() for a pinned file and trigger checkpoint very frequently and simultaneously. F2FS gets stuck in the below code of do_checkpoint() forever. f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); /* Wait for all dirty meta pages to be submitted for IO */ <= if fallocate() here, f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META); <= it'll wait forever. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 67fd1c900eb4..dfa1ac2d751a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1658,7 +1658,11 @@ next_alloc: down_write(&sbi->pin_sem); map.m_seg_type = CURSEG_COLD_DATA_PINNED; + + f2fs_lock_op(sbi); f2fs_allocate_new_segments(sbi, CURSEG_COLD_DATA); + f2fs_unlock_op(sbi); + err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO); up_write(&sbi->pin_sem); From 84597b1f9b051ff75a3471c0331b6875e94b1f7e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 27 May 2020 18:27:51 +0800 Subject: [PATCH 41/51] f2fs: fix wrong value of tracepoint parameter In f2fs_lookup(), we should set @err correctly before printing it in tracepoint. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 8f55201441f7..e94e02c6580a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -504,6 +504,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, err = PTR_ERR(page); goto out; } + err = -ENOENT; goto out_splice; } @@ -549,7 +550,7 @@ out_splice: #endif new = d_splice_alias(inode, dentry); err = PTR_ERR_OR_ZERO(new); - trace_f2fs_lookup_end(dir, dentry, ino, err); + trace_f2fs_lookup_end(dir, dentry, ino, !new ? -ENOENT : err); return new; out_iput: iput(inode); From 47d0d7d76437ca9f48314844bc44dd08615302a1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 27 May 2020 18:27:52 +0800 Subject: [PATCH 42/51] f2fs: remove unneeded return value of __insert_discard_tree() We never use return value of __insert_discard_tree(), so remove it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1c48ec866b8c..ebbadde6cbce 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1221,7 +1221,7 @@ submit: return err; } -static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, +static void __insert_discard_tree(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len, struct rb_node **insert_p, @@ -1230,7 +1230,6 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct rb_node **p; struct rb_node *parent = NULL; - struct discard_cmd *dc = NULL; bool leftmost = true; if (insert_p && insert_parent) { @@ -1242,12 +1241,8 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart, &leftmost); do_insert: - dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, + __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p, leftmost); - if (!dc) - return NULL; - - return dc; } static void __relocate_discard_cmd(struct discard_cmd_control *dcc, From dc35d73a42291b2a68f5b9a12b2b095b82194c1f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 26 May 2020 09:55:02 +0800 Subject: [PATCH 43/51] f2fs: compress: don't compress any datas after cp stop While compressed data writeback, we need to drop dirty pages like we did for non-compressed pages if cp stops, however it's not needed to compress any data in such case, so let's detect cp stop condition in cluster_may_compress() to avoid redundant compressing and let following f2fs_write_raw_pages() drops dirty pages correctly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index bf152c0d79fe..a53578a89211 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -849,6 +849,8 @@ static bool cluster_may_compress(struct compress_ctx *cc) return false; if (!f2fs_cluster_is_full(cc)) return false; + if (unlikely(f2fs_cp_error(F2FS_I_SB(cc->inode)))) + return false; return __cluster_may_compress(cc); } From ca7f76e680745d3b8a386638045f85dac1c4b2f4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 29 May 2020 17:29:47 +0800 Subject: [PATCH 44/51] f2fs: fix wrong discard space Under heavy fsstress, we may triggle panic while issuing discard, because __check_sit_bitmap() detects that discard command may earse valid data blocks, the root cause is as below race stack described, since we removed lock when flushing quota data, quota data writeback may race with write_checkpoint(), so that it causes inconsistency in between cached discard entry and segment bitmap. - f2fs_write_checkpoint - block_operations - set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH) - f2fs_flush_sit_entries - add_discard_addrs - __set_bit_le(i, (void *)de->discard_map); - f2fs_write_data_pages - f2fs_write_single_data_page : inode is quota one, cp_rwsem won't be locked - f2fs_do_write_data_page - f2fs_allocate_data_block - f2fs_wait_discard_bio : discard entry has not been added yet. - update_sit_entry - f2fs_clear_prefree_segments - f2fs_issue_discard : add discard entry In order to fix this, this patch uses node_write to serialize f2fs_allocate_data_block and checkpoint. Fixes: 435cbab95e39 ("f2fs: fix quota_sync failure due to f2fs_lock_op") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ebbadde6cbce..196f31503511 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3107,6 +3107,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, type = CURSEG_COLD_DATA; } + /* + * We need to wait for node_write to avoid block allocation during + * checkpoint. This can only happen to quota writes which can cause + * the below discard race condition. + */ + if (IS_DATASEG(type)) + down_write(&sbi->node_write); + down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); @@ -3172,6 +3180,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, up_read(&SM_I(sbi)->curseg_lock); + if (IS_DATASEG(type)) + up_write(&sbi->node_write); + if (put_pin_sem) up_read(&sbi->pin_sem); } From e78790f84a5417287965a06cd4dea85df0743935 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Tue, 2 Jun 2020 18:11:47 +0530 Subject: [PATCH 45/51] f2fs: fix retry logic in f2fs_write_cache_pages() In case a compressed file is getting overwritten, the current retry logic doesn't include the current page to be retried now as it sets the new start index as 0 and new end index as writeback_index - 1. This causes the corresponding cluster to be uncompressed and written as normal pages without compression. Fix this by allowing writeback to be retried for the current page as well (in case of compressed page getting retried due to index mismatch with cluster index). So that this cluster can be written compressed in case of overwrite. Also, align f2fs_write_cache_pages() according to the change - <64081362e8ff>("mm/page-writeback.c: fix range_cyclic writeback vs writepages deadlock"). Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 48a622b95b76..a65bfc07ddb9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2861,7 +2861,6 @@ static int f2fs_write_cache_pages(struct address_space *mapping, pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; - int cycled; int range_whole = 0; xa_mark_t tag; int nwritten = 0; @@ -2879,17 +2878,12 @@ static int f2fs_write_cache_pages(struct address_space *mapping, if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; - if (index == 0) - cycled = 1; - else - cycled = 0; end = -1; } else { index = wbc->range_start >> PAGE_SHIFT; end = wbc->range_end >> PAGE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; - cycled = 1; /* ignore range_cyclic tests */ } if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag = PAGECACHE_TAG_TOWRITE; @@ -3054,12 +3048,13 @@ next: } } #endif - if ((!cycled && !done) || retry) { - cycled = 1; + if (retry) { index = 0; - end = writeback_index - 1; + end = -1; goto retry; } + if (wbc->range_cyclic && !done) + done_index = 0; if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; From 0b6d4ca04a86b9dababbb76e58d33c437e127b77 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 4 Jun 2020 21:57:48 -0700 Subject: [PATCH 46/51] f2fs: don't return vmalloc() memory from f2fs_kmalloc() kmalloc() returns kmalloc'ed memory, and kvmalloc() returns either kmalloc'ed or vmalloc'ed memory. But the f2fs wrappers, f2fs_kmalloc() and f2fs_kvmalloc(), both return both kinds of memory. It's redundant to have two functions that do the same thing, and also breaking the standard naming convention is causing bugs since people assume it's safe to kfree() memory allocated by f2fs_kmalloc(). See e.g. the various allocations in fs/f2fs/compress.c. Fix this by making f2fs_kmalloc() just use kmalloc(). And to avoid re-introducing the allocation failures that the vmalloc fallback was intended to fix, convert the largest allocations to use f2fs_kvmalloc(). Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/f2fs.h | 8 +------- fs/f2fs/node.c | 8 ++++---- fs/f2fs/super.c | 2 +- 4 files changed, 8 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3dc3ac6fe143..236064930251 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -895,8 +895,8 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) int i; int err; - sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks), - GFP_KERNEL); + sbi->ckpt = f2fs_kvzalloc(sbi, array_size(blk_size, cp_blks), + GFP_KERNEL); if (!sbi->ckpt) return -ENOMEM; /* diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fb180020e175..7794e2f0d6e8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2997,18 +2997,12 @@ static inline bool f2fs_may_extent_tree(struct inode *inode) static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { - void *ret; - if (time_to_inject(sbi, FAULT_KMALLOC)) { f2fs_show_injection_info(sbi, FAULT_KMALLOC); return NULL; } - ret = kmalloc(size, flags); - if (ret) - return ret; - - return kvmalloc(size, flags); + return kmalloc(size, flags); } static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e0bb0f7e0506..03e24df1c84f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2993,7 +2993,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) return 0; nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); - nm_i->nat_bits = f2fs_kzalloc(sbi, + nm_i->nat_bits = f2fs_kvzalloc(sbi, nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL); if (!nm_i->nat_bits) return -ENOMEM; @@ -3126,9 +3126,9 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi) int i; nm_i->free_nid_bitmap = - f2fs_kzalloc(sbi, array_size(sizeof(unsigned char *), - nm_i->nat_blocks), - GFP_KERNEL); + f2fs_kvzalloc(sbi, array_size(sizeof(unsigned char *), + nm_i->nat_blocks), + GFP_KERNEL); if (!nm_i->free_nid_bitmap) return -ENOMEM; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a71da699cb2d..f3c151169542 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3033,7 +3033,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) if (nr_sectors & (bdev_zone_sectors(bdev) - 1)) FDEV(devi).nr_blkz++; - FDEV(devi).blkz_seq = f2fs_kzalloc(sbi, + FDEV(devi).blkz_seq = f2fs_kvzalloc(sbi, BITS_TO_LONGS(FDEV(devi).nr_blkz) * sizeof(unsigned long), GFP_KERNEL); From fc3bb095ab02b9e7d89a069ade2cead15c64c504 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 1 Jun 2020 13:08:05 -0700 Subject: [PATCH 47/51] f2fs: avoid utf8_strncasecmp() with unstable name If the dentry name passed to ->d_compare() fits in dentry::d_iname, then it may be concurrently modified by a rename. This can cause undefined behavior (possibly out-of-bounds memory accesses or crashes) in utf8_strncasecmp(), since fs/unicode/ isn't written to handle strings that may be concurrently modified. Fix this by first copying the filename to a stack buffer if needed. This way we get a stable snapshot of the filename. Fixes: 2c2eb7a300cd ("f2fs: Support case-insensitive file name lookups") Cc: # v5.4+ Cc: Al Viro Cc: Daniel Rosenberg Cc: Gabriel Krisman Bertazi Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 29f70f2295cc..d35976785e8c 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1114,11 +1114,27 @@ static int f2fs_d_compare(const struct dentry *dentry, unsigned int len, const struct inode *dir = READ_ONCE(parent->d_inode); const struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); struct qstr entry = QSTR_INIT(str, len); + char strbuf[DNAME_INLINE_LEN]; int res; if (!dir || !IS_CASEFOLDED(dir)) goto fallback; + /* + * If the dentry name is stored in-line, then it may be concurrently + * modified by a rename. If this happens, the VFS will eventually retry + * the lookup, so it doesn't matter what ->d_compare() returns. + * However, it's unsafe to call utf8_strncasecmp() with an unstable + * string. Therefore, we have to copy the name into a temporary buffer. + */ + if (len <= DNAME_INLINE_LEN - 1) { + memcpy(strbuf, str, len); + strbuf[len] = 0; + entry.name = strbuf; + /* prevent compiler from optimizing out the temporary buffer */ + barrier(); + } + res = utf8_strncasecmp(sbi->s_encoding, name, &entry); if (res >= 0) return res; From 8626441f05dc45a2f4693ee6863d02456ce39e60 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Jun 2020 20:03:16 +0800 Subject: [PATCH 48/51] f2fs: handle readonly filesystem in f2fs_ioc_shutdown() If mountpoint is readonly, we should allow shutdowning filesystem successfully, this fixes issue found by generic/599 testcase of xfstest. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index dfa1ac2d751a..3268f8dd59bb 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2232,8 +2232,15 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) if (in != F2FS_GOING_DOWN_FULLSYNC) { ret = mnt_want_write_file(filp); - if (ret) + if (ret) { + if (ret == -EROFS) { + ret = 0; + f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); + trace_f2fs_shutdown(sbi, in, ret); + } return ret; + } } switch (in) { From bc67c5d0ce406522cc846159f6a0dc0180be58d6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Jun 2020 20:03:17 +0800 Subject: [PATCH 49/51] f2fs: remove unused parameter of f2fs_put_rpages_mapping() Just cleanup, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index a53578a89211..1e02a8c106b0 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -89,8 +89,7 @@ static void f2fs_unlock_rpages(struct compress_ctx *cc, int len) f2fs_drop_rpages(cc, len, true); } -static void f2fs_put_rpages_mapping(struct compress_ctx *cc, - struct address_space *mapping, +static void f2fs_put_rpages_mapping(struct address_space *mapping, pgoff_t start, int len) { int i; @@ -942,7 +941,7 @@ retry: if (!PageUptodate(page)) { f2fs_unlock_rpages(cc, i + 1); - f2fs_put_rpages_mapping(cc, mapping, start_idx, + f2fs_put_rpages_mapping(mapping, start_idx, cc->cluster_size); f2fs_destroy_compress_ctx(cc); goto retry; @@ -977,7 +976,7 @@ retry: unlock_pages: f2fs_unlock_rpages(cc, i); release_pages: - f2fs_put_rpages_mapping(cc, mapping, start_idx, i); + f2fs_put_rpages_mapping(mapping, start_idx, i); f2fs_destroy_compress_ctx(cc); return ret; } From 32b6aba85c8dbd1d3a155543986574c6969f0a48 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 4 Jun 2020 11:49:43 -0700 Subject: [PATCH 50/51] f2fs: add node_io_flag for bio flags likewise data_io_flag This patch adds another way to attach bio flags to node writes. Description: Give a way to attach REQ_META|FUA to node writes given temperature-based bits. Now the bits indicate: * REQ_META | REQ_FUA | * 5 | 4 | 3 | 2 | 1 | 0 | * Cold | Warm | Hot | Cold | Warm | Hot | Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 9 +++++++++ fs/f2fs/data.c | 24 +++++++++++++++--------- fs/f2fs/f2fs.h | 1 + fs/f2fs/sysfs.c | 2 ++ 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 427f5b45c67f..4bb93a06d8ab 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -333,6 +333,15 @@ Description: Give a way to attach REQ_META|FUA to data writes * 5 | 4 | 3 | 2 | 1 | 0 | * Cold | Warm | Hot | Cold | Warm | Hot | +What: /sys/fs/f2fs//node_io_flag +Date: June 2020 +Contact: "Jaegeuk Kim" +Description: Give a way to attach REQ_META|FUA to node writes + given temperature-based bits. Now the bits indicate: + * REQ_META | REQ_FUA | + * 5 | 4 | 3 | 2 | 1 | 0 | + * Cold | Warm | Hot | Cold | Warm | Hot | + What: /sys/fs/f2fs//iostat_period_ms Date: April 2020 Contact: "Daeho Jeong" diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a65bfc07ddb9..798c325a820d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -514,22 +514,28 @@ void f2fs_submit_bio(struct f2fs_sb_info *sbi, __submit_bio(sbi, bio, type); } -static void __attach_data_io_flag(struct f2fs_io_info *fio) +static void __attach_io_flag(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1; - unsigned int fua_flag = sbi->data_io_flag & temp_mask; - unsigned int meta_flag = (sbi->data_io_flag >> NR_TEMP_TYPE) & - temp_mask; + unsigned int io_flag, fua_flag, meta_flag; + + if (fio->type == DATA) + io_flag = sbi->data_io_flag; + else if (fio->type == NODE) + io_flag = sbi->node_io_flag; + else + return; + + fua_flag = io_flag & temp_mask; + meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask; + /* - * data io flag bits per temp: + * data/node io flag bits per temp: * REQ_META | REQ_FUA | * 5 | 4 | 3 | 2 | 1 | 0 | * Cold | Warm | Hot | Cold | Warm | Hot | */ - if (fio->type != DATA) - return; - if ((1 << fio->temp) & meta_flag) fio->op_flags |= REQ_META; if ((1 << fio->temp) & fua_flag) @@ -543,7 +549,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) if (!io->bio) return; - __attach_data_io_flag(fio); + __attach_io_flag(fio); bio_set_op_attrs(io->bio, fio->op, fio->op_flags); if (is_read_io(fio->op)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7794e2f0d6e8..c812fb8e2d9c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1568,6 +1568,7 @@ struct f2fs_sb_info { /* to attach REQ_META|REQ_FUA flags */ unsigned int data_io_flag; + unsigned int node_io_flag; /* For sysfs suppport */ struct kobject s_kobj; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index a117ae1f9d5f..fc4a46b68904 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -554,6 +554,7 @@ F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); #endif F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag); F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(free_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); @@ -635,6 +636,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_type), #endif ATTR_LIST(data_io_flag), + ATTR_LIST(node_io_flag), ATTR_LIST(dirty_segments), ATTR_LIST(free_segments), ATTR_LIST(unusable), From b7b911d59dacb47511a1e604bbfa901beb108305 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 4 Jun 2020 16:45:30 -0700 Subject: [PATCH 51/51] f2fs: attach IO flags to the missing cases This adds more IOs to attach flags. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 798c325a820d..267b5e76a02b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -691,6 +691,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) if (fio->io_wbc && !is_read_io(fio->op)) wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE); + __attach_io_flag(fio); bio_set_op_attrs(bio, fio->op, fio->op_flags); inc_page_count(fio->sbi, is_read_io(fio->op) ? @@ -877,6 +878,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) alloc_new: if (!bio) { bio = __bio_alloc(fio, BIO_MAX_PAGES); + __attach_io_flag(fio); bio_set_op_attrs(bio, fio->op, fio->op_flags); add_bio_entry(fio->sbi, bio, page, fio->temp);