From 1c4bf763039ccdc6dab9e1c3cf84a1cb14c6ff13 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 1 Jun 2016 20:55:51 -0700 Subject: [PATCH 01/68] Revert "f2fs: no need inc dirty pages under inode lock" This reverts commit b951a4ec165af4973b2bd9c80fb5845fbd840435. Conflicts: fs/f2fs/checkpoint.c --- fs/f2fs/checkpoint.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 389160049993..5cf7238f4f89 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -793,13 +793,12 @@ void update_dirty_page(struct inode *inode, struct page *page) !S_ISLNK(inode->i_mode)) return; - if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH)) { - spin_lock(&sbi->inode_lock[type]); + spin_lock(&sbi->inode_lock[type]); + if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH)) __add_dirty_inode(inode, type); - spin_unlock(&sbi->inode_lock[type]); - } - inode_inc_dirty_pages(inode); + spin_unlock(&sbi->inode_lock[type]); + SetPagePrivate(page); f2fs_trace_pid(page); } From 91942321e4c9f8460f260cdfcf0a7a48a73a84a4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 10:13:22 -0700 Subject: [PATCH 02/68] f2fs: use inode pointer for {set, clear}_inode_flag This patch refactors to use inode pointer for set_inode_flag and clear_inode_flag. Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 7 ++-- fs/f2fs/checkpoint.c | 15 ++++----- fs/f2fs/data.c | 21 ++++++------ fs/f2fs/dir.c | 22 ++++++------- fs/f2fs/extent_cache.c | 6 ++-- fs/f2fs/f2fs.h | 74 ++++++++++++++++++++---------------------- fs/f2fs/file.c | 58 +++++++++++++++------------------ fs/f2fs/gc.c | 4 +-- fs/f2fs/inline.c | 25 +++++++------- fs/f2fs/inode.c | 29 ++++++++--------- fs/f2fs/namei.c | 22 ++++++++----- fs/f2fs/node.c | 4 +-- fs/f2fs/segment.c | 2 +- fs/f2fs/segment.h | 2 +- fs/f2fs/super.c | 7 +--- fs/f2fs/xattr.c | 7 ++-- 16 files changed, 145 insertions(+), 160 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index a31c7e859af6..1b2c20228300 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -201,7 +201,6 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) static int __f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl, struct page *ipage) { - struct f2fs_inode_info *fi = F2FS_I(inode); int name_index; void *value = NULL; size_t size = 0; @@ -214,7 +213,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, error = posix_acl_equiv_mode(acl, &inode->i_mode); if (error < 0) return error; - set_acl_inode(fi, inode->i_mode); + set_acl_inode(inode, inode->i_mode); if (error == 0) acl = NULL; } @@ -233,7 +232,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (acl) { value = f2fs_acl_to_disk(acl, &size); if (IS_ERR(value)) { - clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(inode, FI_ACL_MODE); return (int)PTR_ERR(value); } } @@ -244,7 +243,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (!error) set_cached_acl(inode, type, acl); - clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(inode, FI_ACL_MODE); return error; } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5cf7238f4f89..aa8cb4559ff8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -759,28 +759,25 @@ fail_no_cp: static void __add_dirty_inode(struct inode *inode, enum inode_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_inode_info *fi = F2FS_I(inode); int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; - if (is_inode_flag_set(fi, flag)) + if (is_inode_flag_set(inode, flag)) return; - set_inode_flag(fi, flag); - list_add_tail(&fi->dirty_list, &sbi->inode_list[type]); + set_inode_flag(inode, flag); + list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]); stat_inc_dirty_inode(sbi, type); } static void __remove_dirty_inode(struct inode *inode, enum inode_type type) { - struct f2fs_inode_info *fi = F2FS_I(inode); int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; - if (get_dirty_pages(inode) || - !is_inode_flag_set(F2FS_I(inode), flag)) + if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag)) return; - list_del_init(&fi->dirty_list); - clear_inode_flag(fi, flag); + list_del_init(&F2FS_I(inode)->dirty_list); + clear_inode_flag(inode, flag); stat_dec_dirty_inode(F2FS_I_SB(inode), type); } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9a8bbc1fb1fa..dd46ca8e0f83 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -321,7 +321,7 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) if (!count) return 0; - if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) + if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count))) return -ENOSPC; @@ -566,7 +566,7 @@ got_it: ((loff_t)(index + 1) << PAGE_SHIFT)) { i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT)); /* Only the directory inode sets new_i_size */ - set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); + set_inode_flag(inode, FI_UPDATE_DIR); } return page; } @@ -580,7 +580,7 @@ static int __allocate_data_block(struct dnode_of_data *dn) pgoff_t fofs; blkcnt_t count = 1; - if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) + if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); @@ -717,8 +717,7 @@ next_block: } else { err = __allocate_data_block(&dn); if (!err) { - set_inode_flag(F2FS_I(inode), - FI_APPEND_WRITE); + set_inode_flag(inode, FI_APPEND_WRITE); allocated = true; } } @@ -1193,14 +1192,14 @@ retry_encrypt: !IS_ATOMIC_WRITTEN_PAGE(page) && need_inplace_update(inode))) { rewrite_data_page(fio); - set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); + set_inode_flag(inode, FI_UPDATE_WRITE); trace_f2fs_do_write_data_page(page, IPU); } else { write_data_page(&dn, fio); trace_f2fs_do_write_data_page(page, OPU); - set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + set_inode_flag(inode, FI_APPEND_WRITE); if (page->index == 0) - set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); } out_writepage: f2fs_put_dnode(&dn); @@ -1469,7 +1468,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, goto skip_write; /* skip writing during file defragment */ - if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG)) + if (is_inode_flag_set(inode, FI_DO_DEFRAG)) goto skip_write; /* during POR, we don't need to trigger writepage at all. */ @@ -1549,7 +1548,7 @@ restart: if (f2fs_has_inline_data(inode)) { if (pos + len <= MAX_INLINE_DATA) { read_inline_data(page, ipage); - set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) set_inline_node(ipage); } else { @@ -1756,7 +1755,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); if (iov_iter_rw(iter) == WRITE) { if (err > 0) - set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); + set_inode_flag(inode, FI_UPDATE_WRITE); else if (err < 0) f2fs_write_failed(mapping, offset + count); } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index f9313f684540..9fa8d3edb8e0 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -385,7 +385,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, struct page *page; int err; - if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + if (is_inode_flag_set(inode, FI_NEW_INODE)) { page = new_inode_page(inode); if (IS_ERR(page)) return page; @@ -429,7 +429,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, * This file should be checkpointed during fsync. * We lost i_pino from now on. */ - if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { + if (is_inode_flag_set(inode, FI_INC_LINK)) { file_lost_pino(inode); /* * If link the tmpfile to alias through linkat path, @@ -454,23 +454,23 @@ put_error: void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { - if (inode && is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + if (inode && is_inode_flag_set(inode, FI_NEW_INODE)) { if (S_ISDIR(inode->i_mode)) { inc_nlink(dir); - set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + set_inode_flag(dir, FI_UPDATE_DIR); } - clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); + clear_inode_flag(inode, FI_NEW_INODE); } dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty(dir); if (F2FS_I(dir)->i_current_depth != current_depth) { F2FS_I(dir)->i_current_depth = current_depth; - set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + set_inode_flag(dir, FI_UPDATE_DIR); } - if (inode && is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) - clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + if (inode && is_inode_flag_set(inode, FI_INC_LINK)) + clear_inode_flag(inode, FI_INC_LINK); } int room_for_filename(const void *bitmap, int slots, int max_slots) @@ -607,9 +607,9 @@ fail: if (inode) up_write(&F2FS_I(inode)->i_sem); - if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { + if (is_inode_flag_set(dir, FI_UPDATE_DIR)) { update_inode_page(dir); - clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + clear_inode_flag(dir, FI_UPDATE_DIR); } kunmap(dentry_page); f2fs_put_page(dentry_page, 1); @@ -661,7 +661,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) update_inode(inode, page); f2fs_put_page(page, 1); - clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); + clear_inode_flag(inode, FI_NEW_INODE); fail: up_write(&F2FS_I(inode)->i_sem); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 5bfcdb9b69f2..852a0b6f0600 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -431,7 +431,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, write_lock(&et->lock); - if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) { + if (is_inode_flag_set(inode, FI_NO_EXTENT)) { write_unlock(&et->lock); return false; } @@ -523,11 +523,11 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, prev.len < F2FS_MIN_EXTENT_LEN && et->largest.len < F2FS_MIN_EXTENT_LEN) { et->largest.len = 0; - set_inode_flag(F2FS_I(inode), FI_NO_EXTENT); + set_inode_flag(inode, FI_NO_EXTENT); } } - if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) + if (is_inode_flag_set(inode, FI_NO_EXTENT)) __free_extent_tree(sbi, et); write_unlock(&et->lock); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 916e7c238e3d..937106354eae 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1533,64 +1533,62 @@ enum { FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ }; -static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) +static inline void set_inode_flag(struct inode *inode, int flag) { - if (!test_bit(flag, &fi->flags)) - set_bit(flag, &fi->flags); + if (!test_bit(flag, &F2FS_I(inode)->flags)) + set_bit(flag, &F2FS_I(inode)->flags); } -static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) +static inline int is_inode_flag_set(struct inode *inode, int flag) { - return test_bit(flag, &fi->flags); + return test_bit(flag, &F2FS_I(inode)->flags); } -static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag) +static inline void clear_inode_flag(struct inode *inode, int flag) { - if (test_bit(flag, &fi->flags)) - clear_bit(flag, &fi->flags); + if (test_bit(flag, &F2FS_I(inode)->flags)) + clear_bit(flag, &F2FS_I(inode)->flags); } -static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) +static inline void set_acl_inode(struct inode *inode, umode_t mode) { - fi->i_acl_mode = mode; - set_inode_flag(fi, FI_ACL_MODE); + F2FS_I(inode)->i_acl_mode = mode; + set_inode_flag(inode, FI_ACL_MODE); } -static inline void get_inline_info(struct f2fs_inode_info *fi, - struct f2fs_inode *ri) +static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) { if (ri->i_inline & F2FS_INLINE_XATTR) - set_inode_flag(fi, FI_INLINE_XATTR); + set_inode_flag(inode, FI_INLINE_XATTR); if (ri->i_inline & F2FS_INLINE_DATA) - set_inode_flag(fi, FI_INLINE_DATA); + set_inode_flag(inode, FI_INLINE_DATA); if (ri->i_inline & F2FS_INLINE_DENTRY) - set_inode_flag(fi, FI_INLINE_DENTRY); + set_inode_flag(inode, FI_INLINE_DENTRY); if (ri->i_inline & F2FS_DATA_EXIST) - set_inode_flag(fi, FI_DATA_EXIST); + set_inode_flag(inode, FI_DATA_EXIST); if (ri->i_inline & F2FS_INLINE_DOTS) - set_inode_flag(fi, FI_INLINE_DOTS); + set_inode_flag(inode, FI_INLINE_DOTS); } -static inline void set_raw_inline(struct f2fs_inode_info *fi, - struct f2fs_inode *ri) +static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) { ri->i_inline = 0; - if (is_inode_flag_set(fi, FI_INLINE_XATTR)) + if (is_inode_flag_set(inode, FI_INLINE_XATTR)) ri->i_inline |= F2FS_INLINE_XATTR; - if (is_inode_flag_set(fi, FI_INLINE_DATA)) + if (is_inode_flag_set(inode, FI_INLINE_DATA)) ri->i_inline |= F2FS_INLINE_DATA; - if (is_inode_flag_set(fi, FI_INLINE_DENTRY)) + if (is_inode_flag_set(inode, FI_INLINE_DENTRY)) ri->i_inline |= F2FS_INLINE_DENTRY; - if (is_inode_flag_set(fi, FI_DATA_EXIST)) + if (is_inode_flag_set(inode, FI_DATA_EXIST)) ri->i_inline |= F2FS_DATA_EXIST; - if (is_inode_flag_set(fi, FI_INLINE_DOTS)) + if (is_inode_flag_set(inode, FI_INLINE_DOTS)) ri->i_inline |= F2FS_INLINE_DOTS; } static inline int f2fs_has_inline_xattr(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR); + return is_inode_flag_set(inode, FI_INLINE_XATTR); } static inline unsigned int addrs_per_inode(struct inode *inode) @@ -1617,43 +1615,43 @@ static inline int inline_xattr_size(struct inode *inode) static inline int f2fs_has_inline_data(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); + return is_inode_flag_set(inode, FI_INLINE_DATA); } static inline void f2fs_clear_inline_inode(struct inode *inode) { - clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - clear_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + clear_inode_flag(inode, FI_INLINE_DATA); + clear_inode_flag(inode, FI_DATA_EXIST); } static inline int f2fs_exist_data(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); + return is_inode_flag_set(inode, FI_DATA_EXIST); } static inline int f2fs_has_inline_dots(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DOTS); + return is_inode_flag_set(inode, FI_INLINE_DOTS); } static inline bool f2fs_is_atomic_file(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); + return is_inode_flag_set(inode, FI_ATOMIC_FILE); } static inline bool f2fs_is_volatile_file(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); + return is_inode_flag_set(inode, FI_VOLATILE_FILE); } static inline bool f2fs_is_first_block_written(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); + return is_inode_flag_set(inode, FI_FIRST_BLOCK_WRITTEN); } static inline bool f2fs_is_drop_cache(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE); + return is_inode_flag_set(inode, FI_DROP_CACHE); } static inline void *inline_data_addr(struct page *page) @@ -1664,7 +1662,7 @@ static inline void *inline_data_addr(struct page *page) static inline int f2fs_has_inline_dentry(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY); + return is_inode_flag_set(inode, FI_INLINE_DENTRY); } static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) @@ -1712,7 +1710,7 @@ static inline bool is_dot_dotdot(const struct qstr *str) static inline bool f2fs_may_extent_tree(struct inode *inode) { if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) || - is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) + is_inode_flag_set(inode, FI_NO_EXTENT)) return false; return S_ISREG(inode->i_mode); @@ -1748,7 +1746,7 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags) } #define get_inode_mode(i) \ - ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ + ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) /* get offset of first page in next direct node */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f4c0086655c4..e68e5adc23cd 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -186,7 +186,6 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, int datasync, bool atomic) { struct inode *inode = file->f_mapping->host; - struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t ino = inode->i_ino; int ret = 0; @@ -204,9 +203,9 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, /* if fdatasync is triggered, let's do in-place-update */ if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) - set_inode_flag(fi, FI_NEED_IPU); + set_inode_flag(inode, FI_NEED_IPU); ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - clear_inode_flag(fi, FI_NEED_IPU); + clear_inode_flag(inode, FI_NEED_IPU); if (ret) { trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); @@ -222,14 +221,14 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, /* * if there is no written data, don't waste time to write recovery info. */ - if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && + if (!is_inode_flag_set(inode, FI_APPEND_WRITE) && !exist_written_data(sbi, ino, APPEND_INO)) { /* it may call write_inode just prior to fsync */ if (need_inode_page_update(sbi, ino)) goto go_write; - if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || + if (is_inode_flag_set(inode, FI_UPDATE_WRITE) || exist_written_data(sbi, ino, UPDATE_INO)) goto flush_out; goto out; @@ -239,9 +238,9 @@ go_write: * Both of fdatasync() and fsync() are able to be recovered from * sudden-power-off. */ - down_read(&fi->i_sem); + down_read(&F2FS_I(inode)->i_sem); need_cp = need_do_checkpoint(inode); - up_read(&fi->i_sem); + up_read(&F2FS_I(inode)->i_sem); if (need_cp) { /* all the dirty node pages should be flushed for POR */ @@ -252,8 +251,8 @@ go_write: * will be used only for fsynced inodes after checkpoint. */ try_to_fix_pino(inode); - clear_inode_flag(fi, FI_APPEND_WRITE); - clear_inode_flag(fi, FI_UPDATE_WRITE); + clear_inode_flag(inode, FI_APPEND_WRITE); + clear_inode_flag(inode, FI_UPDATE_WRITE); goto out; } sync_nodes: @@ -279,10 +278,10 @@ sync_nodes: /* once recovery info is written, don't need to tack this */ remove_ino_entry(sbi, ino, APPEND_INO); - clear_inode_flag(fi, FI_APPEND_WRITE); + clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: remove_ino_entry(sbi, ino, UPDATE_INO); - clear_inode_flag(fi, FI_UPDATE_WRITE); + clear_inode_flag(inode, FI_UPDATE_WRITE); ret = f2fs_issue_flush(sbi); f2fs_update_time(sbi, REQ_TIME); out: @@ -487,8 +486,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) set_data_blkaddr(dn); invalidate_blocks(sbi, blkaddr); if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) - clear_inode_flag(F2FS_I(dn->inode), - FI_FIRST_BLOCK_WRITTEN); + clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); nr_free++; } @@ -654,7 +652,6 @@ int f2fs_getattr(struct vfsmount *mnt, #ifdef CONFIG_F2FS_FS_POSIX_ACL static void __setattr_copy(struct inode *inode, const struct iattr *attr) { - struct f2fs_inode_info *fi = F2FS_I(inode); unsigned int ia_valid = attr->ia_valid; if (ia_valid & ATTR_UID) @@ -675,7 +672,7 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr) if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) mode &= ~S_ISGID; - set_acl_inode(fi, mode); + set_acl_inode(inode, mode); } } #else @@ -685,7 +682,6 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr) int f2fs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); - struct f2fs_inode_info *fi = F2FS_I(inode); int err; err = inode_change_ok(inode, attr); @@ -724,9 +720,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_MODE) { err = posix_acl_chmod(inode, get_inode_mode(inode)); - if (err || is_inode_flag_set(fi, FI_ACL_MODE)) { - inode->i_mode = fi->i_acl_mode; - clear_inode_flag(fi, FI_ACL_MODE); + if (err || is_inode_flag_set(inode, FI_ACL_MODE)) { + inode->i_mode = F2FS_I(inode)->i_acl_mode; + clear_inode_flag(inode, FI_ACL_MODE); } } @@ -1310,10 +1306,10 @@ static int f2fs_release_file(struct inode *inode, struct file *filp) if (f2fs_is_atomic_file(inode)) drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { - clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); - set_inode_flag(F2FS_I(inode), FI_DROP_CACHE); + clear_inode_flag(inode, FI_VOLATILE_FILE); + set_inode_flag(inode, FI_DROP_CACHE); filemap_fdatawrite(inode->i_mapping); - clear_inode_flag(F2FS_I(inode), FI_DROP_CACHE); + clear_inode_flag(inode, FI_DROP_CACHE); } return 0; } @@ -1412,7 +1408,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) if (ret) goto out; - set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + set_inode_flag(inode, FI_ATOMIC_FILE); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); if (!get_dirty_pages(inode)) @@ -1423,7 +1419,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_ATOMIC_FILE); out: inode_unlock(inode); mnt_drop_write_file(filp); @@ -1448,10 +1444,10 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) goto err_out; if (f2fs_is_atomic_file(inode)) { - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_ATOMIC_FILE); ret = commit_inmem_pages(inode); if (ret) { - set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + set_inode_flag(inode, FI_ATOMIC_FILE); goto err_out; } } @@ -1484,7 +1480,7 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) if (ret) goto out; - set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + set_inode_flag(inode, FI_VOLATILE_FILE); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); out: inode_unlock(inode); @@ -1538,7 +1534,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) if (f2fs_is_atomic_file(inode)) drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { - clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + clear_inode_flag(inode, FI_VOLATILE_FILE); ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); } @@ -1871,7 +1867,7 @@ do_map: continue; } - set_inode_flag(F2FS_I(inode), FI_DO_DEFRAG); + set_inode_flag(inode, FI_DO_DEFRAG); idx = map.m_lblk; while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { @@ -1896,14 +1892,14 @@ do_map: if (idx < pg_end && cnt < blk_per_seg) goto do_map; - clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG); + clear_inode_flag(inode, FI_DO_DEFRAG); err = filemap_fdatawrite(inode->i_mapping); if (err) goto out; } clear_out: - clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG); + clear_inode_flag(inode, FI_DO_DEFRAG); out: inode_unlock(inode); if (!err) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 38d56f678912..4a03076074af 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -617,9 +617,9 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) f2fs_submit_page_mbio(&fio); f2fs_update_data_blkaddr(&dn, newaddr); - set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + set_inode_flag(inode, FI_APPEND_WRITE); if (page->index == 0) - set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); put_page_out: f2fs_put_page(fio.encrypted_page, 1); recover_block: diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index a4bb155dd00a..c50dee937c1b 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -138,7 +138,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) inode_dec_dirty_pages(dn->inode); /* this converted inline_data should be recovered. */ - set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); + set_inode_flag(dn->inode, FI_APPEND_WRITE); /* clear inline data and flag after data writeback */ truncate_inline_inode(dn->inode_page, 0); @@ -213,8 +213,8 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) memcpy(dst_addr, src_addr, MAX_INLINE_DATA); kunmap_atomic(src_addr); - set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); - set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + set_inode_flag(inode, FI_APPEND_WRITE); + set_inode_flag(inode, FI_DATA_EXIST); sync_inode_page(&dn); clear_inline_node(dn.inode_page); @@ -252,8 +252,8 @@ process_inline: dst_addr = inline_data_addr(ipage); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + set_inode_flag(inode, FI_INLINE_DATA); + set_inode_flag(inode, FI_DATA_EXIST); update_inode(inode, ipage); f2fs_put_page(ipage, 1); @@ -341,7 +341,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, /* update i_size to MAX_INLINE_DATA */ if (i_size_read(inode) < MAX_INLINE_DATA) { i_size_write(inode, MAX_INLINE_DATA); - set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); + set_inode_flag(inode, FI_UPDATE_DIR); } return 0; } @@ -398,12 +398,12 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, truncate_inline_inode(ipage, 0); stat_dec_inline_dir(dir); - clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); + clear_inode_flag(dir, FI_INLINE_DENTRY); F2FS_I(dir)->i_current_depth = 1; if (i_size_read(dir) < PAGE_SIZE) { i_size_write(dir, PAGE_SIZE); - set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + set_inode_flag(dir, FI_UPDATE_DIR); } sync_inode_page(&dn); @@ -464,7 +464,6 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, struct f2fs_inline_dentry *inline_dentry) { struct f2fs_inline_dentry *backup_dentry; - struct f2fs_inode_info *fi = F2FS_I(dir); int err; backup_dentry = f2fs_kmalloc(sizeof(struct f2fs_inline_dentry), @@ -486,14 +485,14 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, lock_page(ipage); stat_dec_inline_dir(dir); - clear_inode_flag(fi, FI_INLINE_DENTRY); + clear_inode_flag(dir, FI_INLINE_DENTRY); update_inode(dir, ipage); kfree(backup_dentry); return 0; recover: lock_page(ipage); memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA); - fi->i_current_depth = 0; + F2FS_I(dir)->i_current_depth = 0; i_size_write(dir, MAX_INLINE_DATA); update_inode(dir, ipage); f2fs_put_page(ipage, 1); @@ -569,9 +568,9 @@ fail: if (inode) up_write(&F2FS_I(inode)->i_sem); - if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { + if (is_inode_flag_set(dir, FI_UPDATE_DIR)) { update_inode(dir, ipage); - clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + clear_inode_flag(dir, FI_UPDATE_DIR); } out: f2fs_put_page(ipage, 1); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2e68adab0d64..62d8c9052f9b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -85,8 +85,8 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage) if (*start++) { f2fs_wait_on_page_writeback(ipage, NODE, true); - set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); - set_raw_inline(F2FS_I(inode), F2FS_INODE(ipage)); + set_inode_flag(inode, FI_DATA_EXIST); + set_raw_inline(inode, F2FS_INODE(ipage)); set_page_dirty(ipage); return; } @@ -141,7 +141,7 @@ static int do_read_inode(struct inode *inode) if (f2fs_init_extent_tree(inode, &ri->i_ext)) set_page_dirty(node_page); - get_inline_info(fi, ri); + get_inline_info(inode, ri); /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) @@ -151,7 +151,7 @@ static int do_read_inode(struct inode *inode) __get_inode_rdev(inode, ri); if (__written_first_block(ri)) - set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); f2fs_put_page(node_page, 1); @@ -244,7 +244,7 @@ int update_inode(struct inode *inode, struct page *node_page) &ri->i_ext); else memset(&ri->i_ext, 0, sizeof(ri->i_ext)); - set_raw_inline(F2FS_I(inode), ri); + set_raw_inline(inode, ri); ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); @@ -261,7 +261,7 @@ int update_inode(struct inode *inode, struct page *node_page) __set_inode_rdev(inode, ri); set_cold_node(inode, node_page); - clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); + clear_inode_flag(inode, FI_DIRTY_INODE); /* deleted inode */ if (inode->i_nlink == 0) @@ -300,7 +300,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) inode->i_ino == F2FS_META_INO(sbi)) return 0; - if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE)) + if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; /* @@ -318,8 +318,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) void f2fs_evict_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_inode_info *fi = F2FS_I(inode); - nid_t xnid = fi->i_xattr_nid; + nid_t xnid = F2FS_I(inode)->i_xattr_nid; int err = 0; /* some remained atomic pages should discarded */ @@ -342,7 +341,7 @@ void f2fs_evict_inode(struct inode *inode) goto no_delete; sb_start_intwrite(inode->i_sb); - set_inode_flag(fi, FI_NO_ALLOC); + set_inode_flag(inode, FI_NO_ALLOC); i_size_write(inode, 0); retry: if (F2FS_HAS_BLOCKS(inode)) @@ -369,13 +368,13 @@ no_delete: invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); - if (is_inode_flag_set(fi, FI_APPEND_WRITE)) + if (is_inode_flag_set(inode, FI_APPEND_WRITE)) add_ino_entry(sbi, inode->i_ino, APPEND_INO); - if (is_inode_flag_set(fi, FI_UPDATE_WRITE)) + if (is_inode_flag_set(inode, FI_UPDATE_WRITE)) add_ino_entry(sbi, inode->i_ino, UPDATE_INO); - if (is_inode_flag_set(fi, FI_FREE_NID)) { + if (is_inode_flag_set(inode, FI_FREE_NID)) { alloc_nid_failed(sbi, inode->i_ino); - clear_inode_flag(fi, FI_FREE_NID); + clear_inode_flag(inode, FI_FREE_NID); } f2fs_bug_on(sbi, err && !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); @@ -411,7 +410,7 @@ void handle_failed_inode(struct inode *inode) } alloc_nid_done(sbi, inode->i_ino); } else { - set_inode_flag(F2FS_I(inode), FI_FREE_NID); + set_inode_flag(inode, FI_FREE_NID); } f2fs_unlock_op(sbi); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 324ed3812f30..cbf61a5f0e57 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -60,10 +60,14 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); + set_inode_flag(inode, FI_NEW_INODE); + + if (test_opt(sbi, INLINE_XATTR)) + set_inode_flag(inode, FI_INLINE_XATTR); if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + set_inode_flag(inode, FI_INLINE_DATA); if (f2fs_may_inline_dentry(inode)) - set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); + set_inode_flag(inode, FI_INLINE_DENTRY); f2fs_init_extent_tree(inode, NULL); @@ -79,7 +83,7 @@ fail: trace_f2fs_new_inode(inode, err); make_bad_inode(inode); if (nid_free) - set_inode_flag(F2FS_I(inode), FI_FREE_NID); + set_inode_flag(inode, FI_FREE_NID); iput(inode); return ERR_PTR(err); } @@ -177,7 +181,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, inode->i_ctime = CURRENT_TIME; ihold(inode); - set_inode_flag(F2FS_I(inode), FI_INC_LINK); + set_inode_flag(inode, FI_INC_LINK); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -190,7 +194,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, f2fs_sync_fs(sbi->sb, 1); return 0; out: - clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + clear_inode_flag(inode, FI_INC_LINK); iput(inode); f2fs_unlock_op(sbi); return err; @@ -244,7 +248,7 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) } out: if (!err) { - clear_inode_flag(F2FS_I(dir), FI_INLINE_DOTS); + clear_inode_flag(dir, FI_INLINE_DOTS); mark_inode_dirty(dir); } @@ -492,7 +496,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) f2fs_balance_fs(sbi, true); - set_inode_flag(F2FS_I(inode), FI_INC_LINK); + set_inode_flag(inode, FI_INC_LINK); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -509,7 +513,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) return 0; out_fail: - clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + clear_inode_flag(inode, FI_INC_LINK); handle_failed_inode(inode); return err; } @@ -763,7 +767,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (whiteout) { whiteout->i_state |= I_LINKABLE; - set_inode_flag(F2FS_I(whiteout), FI_INC_LINK); + set_inode_flag(whiteout, FI_INC_LINK); err = f2fs_add_link(old_dentry, whiteout); if (err) goto put_out_dir; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1f21aae80c40..8001020f7762 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1019,7 +1019,7 @@ struct page *new_node_page(struct dnode_of_data *dn, struct page *page; int err; - if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) + if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return ERR_PTR(-EPERM); page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false); @@ -1955,7 +1955,7 @@ void recover_inline_xattr(struct inode *inode, struct page *page) ri = F2FS_INODE(page); if (!(ri->i_inline & F2FS_INLINE_XATTR)) { - clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR); + clear_inode_flag(inode, FI_INLINE_XATTR); goto update_inode; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2e6f537a0e7d..77dc929715cf 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -241,7 +241,7 @@ void drop_inmem_pages(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_ATOMIC_FILE); mutex_lock(&fi->inmem_lock); __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 7a756ff5a36d..fcdd7310b961 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -544,7 +544,7 @@ static inline bool need_inplace_update(struct inode *inode) /* this is only set during fdatasync */ if (policy & (0x1 << F2FS_IPU_FSYNC) && - is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) + is_inode_flag_set(inode, FI_NEED_IPU)) return true; return false; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 74cc8520b8b1..160b15b28b95 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -540,11 +540,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); - set_inode_flag(fi, FI_NEW_INODE); - - if (test_opt(F2FS_SB(sb), INLINE_XATTR)) - set_inode_flag(fi, FI_INLINE_XATTR); - /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; return &fi->vfs_inode; @@ -596,7 +591,7 @@ static int f2fs_drop_inode(struct inode *inode) */ static void f2fs_dirty_inode(struct inode *inode, int flags) { - set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); + set_inode_flag(inode, FI_DIRTY_INODE); } static void f2fs_i_callback(struct rcu_head *head) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index e3decae3acfb..ca12d4b051f7 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -441,7 +441,6 @@ static int __f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, struct page *ipage, int flags) { - struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_xattr_entry *here, *last; void *base_addr; int found, newsize; @@ -539,10 +538,10 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (error) goto exit; - if (is_inode_flag_set(fi, FI_ACL_MODE)) { - inode->i_mode = fi->i_acl_mode; + if (is_inode_flag_set(inode, FI_ACL_MODE)) { + inode->i_mode = F2FS_I(inode)->i_acl_mode; inode->i_ctime = CURRENT_TIME; - clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(inode, FI_ACL_MODE); } if (index == F2FS_XATTR_INDEX_ENCRYPTION && !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) From fc9581c809722960c46a02445f2434120e5e483b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 09:22:03 -0700 Subject: [PATCH 03/68] f2fs: introduce f2fs_i_size_write with mark_inode_dirty_sync This patch introduces f2fs_i_size_write() to call mark_inode_dirty_sync() with i_size_write(). Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 ++++------ fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 9 +++++++++ fs/f2fs/file.c | 10 ++++------ fs/f2fs/inline.c | 6 +++--- fs/f2fs/recovery.c | 2 +- fs/f2fs/super.c | 2 +- 7 files changed, 23 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index dd46ca8e0f83..0dd42b6ce6f3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -564,7 +564,7 @@ struct page *get_new_data_page(struct inode *inode, got_it: if (new_i_size && i_size_read(inode) < ((loff_t)(index + 1) << PAGE_SHIFT)) { - i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT)); + f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT)); /* Only the directory inode sets new_i_size */ set_inode_flag(inode, FI_UPDATE_DIR); } @@ -605,7 +605,7 @@ alloc: fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + dn->ofs_in_node; if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT)) - i_size_write(dn->inode, + f2fs_i_size_write(dn->inode, ((loff_t)(fofs + 1) << PAGE_SHIFT)); return 0; } @@ -1711,10 +1711,8 @@ static int f2fs_write_end(struct file *file, set_page_dirty(page); - if (pos + copied > i_size_read(inode)) { - i_size_write(inode, pos + copied); - mark_inode_dirty(inode); - } + if (pos + copied > i_size_read(inode)) + f2fs_i_size_write(inode, pos + copied); f2fs_put_page(page, 1); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 9fa8d3edb8e0..dc4beae14c92 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -686,7 +686,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page) drop_nlink(inode); if (S_ISDIR(inode->i_mode)) { drop_nlink(inode); - i_size_write(inode, 0); + f2fs_i_size_write(inode, 0); } up_write(&F2FS_I(inode)->i_sem); update_inode_page(inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 937106354eae..bdd6b875f50d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1556,6 +1556,15 @@ static inline void set_acl_inode(struct inode *inode, umode_t mode) set_inode_flag(inode, FI_ACL_MODE); } +static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size) +{ + if (i_size_read(inode) == i_size) + return; + + i_size_write(inode, i_size); + mark_inode_dirty_sync(inode); +} + static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) { if (ri->i_inline & F2FS_INLINE_XATTR) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e68e5adc23cd..d0f42587309f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -988,7 +988,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) ret = truncate_blocks(inode, new_size, true); if (!ret) - i_size_write(inode, new_size); + f2fs_i_size_write(inode, new_size); return ret; } @@ -1125,8 +1125,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, out: if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) { - i_size_write(inode, new_size); - mark_inode_dirty(inode); + f2fs_i_size_write(inode, new_size); update_inode_page(inode); } @@ -1186,7 +1185,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) truncate_pagecache(inode, offset); if (!ret) - i_size_write(inode, new_size); + f2fs_i_size_write(inode, new_size); return ret; } @@ -1235,8 +1234,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, } if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) { - i_size_write(inode, new_size); - mark_inode_dirty(inode); + f2fs_i_size_write(inode, new_size); update_inode_page(inode); } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index c50dee937c1b..7a9bc442dd77 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -340,7 +340,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, /* update i_size to MAX_INLINE_DATA */ if (i_size_read(inode) < MAX_INLINE_DATA) { - i_size_write(inode, MAX_INLINE_DATA); + f2fs_i_size_write(inode, MAX_INLINE_DATA); set_inode_flag(inode, FI_UPDATE_DIR); } return 0; @@ -402,7 +402,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, F2FS_I(dir)->i_current_depth = 1; if (i_size_read(dir) < PAGE_SIZE) { - i_size_write(dir, PAGE_SIZE); + f2fs_i_size_write(dir, PAGE_SIZE); set_inode_flag(dir, FI_UPDATE_DIR); } @@ -493,7 +493,7 @@ recover: lock_page(ipage); memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA); F2FS_I(dir)->i_current_depth = 0; - i_size_write(dir, MAX_INLINE_DATA); + f2fs_i_size_write(dir, MAX_INLINE_DATA); update_inode(dir, ipage); f2fs_put_page(ipage, 1); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 3d7216d7a288..2500b6a5daf0 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -175,7 +175,7 @@ static void recover_inode(struct inode *inode, struct page *page) char *name; inode->i_mode = le16_to_cpu(raw->i_mode); - i_size_write(inode, le64_to_cpu(raw->i_size)); + f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime); inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 160b15b28b95..d832bf4051b5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -568,7 +568,7 @@ static int f2fs_drop_inode(struct inode *inode) f2fs_destroy_extent_node(inode); sb_start_intwrite(inode->i_sb); - i_size_write(inode, 0); + f2fs_i_size_write(inode, 0); if (F2FS_HAS_BLOCKS(inode)) f2fs_truncate(inode, true); From 8edd03c870e4eb8d635d507a7d83fe35d76117c2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 09:26:06 -0700 Subject: [PATCH 04/68] f2fs: introduce f2fs_i_blocks_write with mark_inode_dirty_sync This patch introduces f2fs_i_blocks_write() to call mark_inode_dirty_sync() when changing inode->i_blocks. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 - fs/f2fs/f2fs.h | 17 +++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0dd42b6ce6f3..6a4c60c2fd3a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -344,7 +344,6 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) if (set_page_dirty(dn->node_page)) dn->node_changed = true; - mark_inode_dirty(dn->inode); sync_inode_page(dn); return 0; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bdd6b875f50d..9536e9e17b40 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1119,6 +1119,7 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) return ofs == XATTR_NODE_OFFSET; } +static inline void f2fs_i_blocks_write(struct inode *, blkcnt_t, bool); static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count) { @@ -1141,7 +1142,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, } } /* *count can be recalculated */ - inode->i_blocks += *count; + f2fs_i_blocks_write(inode, *count, true); sbi->total_valid_block_count = sbi->total_valid_block_count + (block_t)(*count); spin_unlock(&sbi->stat_lock); @@ -1157,7 +1158,7 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); f2fs_bug_on(sbi, inode->i_blocks < count); - inode->i_blocks -= count; + f2fs_i_blocks_write(inode, count, false); sbi->total_valid_block_count -= (block_t)count; spin_unlock(&sbi->stat_lock); } @@ -1294,7 +1295,7 @@ static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, } if (inode) - inode->i_blocks++; + f2fs_i_blocks_write(inode, 1, true); sbi->total_valid_node_count++; sbi->total_valid_block_count++; @@ -1313,7 +1314,7 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, !sbi->total_valid_node_count); f2fs_bug_on(sbi, !inode->i_blocks); - inode->i_blocks--; + f2fs_i_blocks_write(inode, 1, false); sbi->total_valid_node_count--; sbi->total_valid_block_count--; @@ -1556,6 +1557,14 @@ static inline void set_acl_inode(struct inode *inode, umode_t mode) set_inode_flag(inode, FI_ACL_MODE); } +static inline void f2fs_i_blocks_write(struct inode *inode, + blkcnt_t diff, bool add) +{ + inode->i_blocks = add ? inode->i_blocks + diff : + inode->i_blocks - diff; + mark_inode_dirty_sync(inode); +} + static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size) { if (i_size_read(inode) == i_size) From a1961246c392dbf7dabdc81b4e074c32053fbf61 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 09:43:20 -0700 Subject: [PATCH 05/68] f2fs: introduce f2fs_i_links_write with mark_inode_dirty_sync This patch introduces f2fs_i_links_write() to call mark_inode_dirty_sync() when changing inode->i_links. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 1 + fs/f2fs/dir.c | 10 +++++----- fs/f2fs/f2fs.h | 9 +++++++++ fs/f2fs/namei.c | 30 +++++++++--------------------- 4 files changed, 24 insertions(+), 26 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index aa8cb4559ff8..02e0522beccf 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -535,6 +535,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) } clear_nlink(inode); + mark_inode_dirty_sync(inode); /* truncate all the data during iput */ iput(inode); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index dc4beae14c92..f8ca0f31271f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -437,7 +437,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, */ if (inode->i_nlink == 0) remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino); - inc_nlink(inode); + f2fs_i_links_write(inode, true); } return page; @@ -456,7 +456,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, { if (inode && is_inode_flag_set(inode, FI_NEW_INODE)) { if (S_ISDIR(inode->i_mode)) { - inc_nlink(dir); + f2fs_i_links_write(dir, true); set_inode_flag(dir, FI_UPDATE_DIR); } clear_inode_flag(inode, FI_NEW_INODE); @@ -675,7 +675,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page) down_write(&F2FS_I(inode)->i_sem); if (S_ISDIR(inode->i_mode)) { - drop_nlink(dir); + f2fs_i_links_write(dir, false); if (page) update_inode(dir, page); else @@ -683,9 +683,9 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page) } inode->i_ctime = CURRENT_TIME; - drop_nlink(inode); + f2fs_i_links_write(inode, false); if (S_ISDIR(inode->i_mode)) { - drop_nlink(inode); + f2fs_i_links_write(inode, false); f2fs_i_size_write(inode, 0); } up_write(&F2FS_I(inode)->i_sem); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9536e9e17b40..d98aaf3829a6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1557,6 +1557,15 @@ static inline void set_acl_inode(struct inode *inode, umode_t mode) set_inode_flag(inode, FI_ACL_MODE); } +static inline void f2fs_i_links_write(struct inode *inode, bool inc) +{ + if (inc) + inc_nlink(inode); + else + drop_nlink(inode); + mark_inode_dirty_sync(inode); +} + static inline void f2fs_i_blocks_write(struct inode *inode, blkcnt_t diff, bool add) { diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index cbf61a5f0e57..af7c75ab9343 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -349,9 +349,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) f2fs_delete_entry(de, page, dir, inode); f2fs_unlock_op(sbi); - /* In order to evict this inode, we set it dirty */ - mark_inode_dirty(inode); - if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); fail: @@ -597,16 +594,16 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, * remove all unused data of tmpfile after abnormal power-off. */ add_orphan_inode(sbi, inode->i_ino); - f2fs_unlock_op(sbi); - alloc_nid_done(sbi, inode->i_ino); if (whiteout) { - inode_dec_link_count(inode); + f2fs_i_links_write(inode, false); *whiteout = inode; } else { d_tmpfile(dentry, inode); } + /* link_count was changed by d_tmpfile as well. */ + f2fs_unlock_op(sbi); unlock_new_inode(inode); return 0; @@ -704,12 +701,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, new_inode->i_ctime = CURRENT_TIME; down_write(&F2FS_I(new_inode)->i_sem); if (old_dir_entry) - drop_nlink(new_inode); - drop_nlink(new_inode); + f2fs_i_links_write(new_inode, false); + f2fs_i_links_write(new_inode, false); up_write(&F2FS_I(new_inode)->i_sem); - mark_inode_dirty(new_inode); - if (!new_inode->i_nlink) add_orphan_inode(sbi, new_inode->i_ino); else @@ -729,7 +724,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } if (old_dir_entry) { - inc_nlink(new_dir); + f2fs_i_links_write(new_dir, true); update_inode_page(new_dir); } @@ -784,8 +779,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } - drop_nlink(old_dir); - mark_inode_dirty(old_dir); + f2fs_i_links_write(old_dir, false); update_inode_page(old_dir); } @@ -913,10 +907,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir->i_ctime = CURRENT_TIME; if (old_nlink) { down_write(&F2FS_I(old_dir)->i_sem); - if (old_nlink < 0) - drop_nlink(old_dir); - else - inc_nlink(old_dir); + f2fs_i_links_write(old_dir, old_nlink > 0); up_write(&F2FS_I(old_dir)->i_sem); } mark_inode_dirty(old_dir); @@ -934,10 +925,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, new_dir->i_ctime = CURRENT_TIME; if (new_nlink) { down_write(&F2FS_I(new_dir)->i_sem); - if (new_nlink < 0) - drop_nlink(new_dir); - else - inc_nlink(new_dir); + f2fs_i_links_write(new_dir, new_nlink > 0); up_write(&F2FS_I(new_dir)->i_sem); } mark_inode_dirty(new_dir); From 205b98221cdf72b1cbdedf55f93d193999616e6e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 09:52:20 -0700 Subject: [PATCH 06/68] f2fs: call mark_inode_dirty_sync for i_field changes This patch calls mark_inode_dirty_sync() for the following on-disk inode changes. -> largest -> ctime/mtime/atime -> i_current_depth -> i_xattr_nid -> i_pino -> i_advise -> i_flags -> i_mode Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 2 ++ fs/f2fs/dir.c | 14 +++++----- fs/f2fs/extent_cache.c | 24 +++++++++-------- fs/f2fs/f2fs.h | 58 ++++++++++++++++++++++++++++++++++++------ fs/f2fs/file.c | 12 ++++----- fs/f2fs/inline.c | 7 ++--- fs/f2fs/inode.c | 1 + fs/f2fs/namei.c | 11 +++----- fs/f2fs/node.c | 6 ++--- fs/f2fs/xattr.c | 3 ++- 10 files changed, 92 insertions(+), 46 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 1b2c20228300..6a414e75e705 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -384,6 +384,8 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, if (error) return error; + mark_inode_dirty_sync(inode); + if (default_acl) { error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, ipage); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index f8ca0f31271f..384d51cb77bf 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -243,8 +243,7 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, "Corrupted max_depth of %lu: %u", dir->i_ino, max_depth); max_depth = MAX_DIR_HASH_DEPTH; - F2FS_I(dir)->i_current_depth = max_depth; - mark_inode_dirty(dir); + f2fs_i_depth_write(dir, max_depth); } for (level = 0; level < max_depth; level++) { @@ -303,9 +302,9 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, set_de_type(de, inode->i_mode); f2fs_dentry_kunmap(dir, page); set_page_dirty(page); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - mark_inode_dirty(dir); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + mark_inode_dirty_sync(dir); f2fs_put_page(page, 1); } @@ -462,10 +461,10 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, clear_inode_flag(inode, FI_NEW_INODE); } dir->i_mtime = dir->i_ctime = CURRENT_TIME; - mark_inode_dirty(dir); + mark_inode_dirty_sync(dir); if (F2FS_I(dir)->i_current_depth != current_depth) { - F2FS_I(dir)->i_current_depth = current_depth; + f2fs_i_depth_write(dir, current_depth); set_inode_flag(dir, FI_UPDATE_DIR); } @@ -597,7 +596,7 @@ add_dentry: if (inode) { /* we don't need to mark_inode_dirty now */ - F2FS_I(inode)->i_pino = dir->i_ino; + f2fs_i_pino_write(inode, dir->i_ino); update_inode(inode, page); f2fs_put_page(page, 1); } @@ -730,6 +729,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, set_page_dirty(page); dir->i_ctime = dir->i_mtime = CURRENT_TIME; + mark_inode_dirty_sync(dir); if (inode) f2fs_drop_nlink(dir, inode, NULL); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 852a0b6f0600..d21dda607bf2 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -170,8 +170,10 @@ static void __drop_largest_extent(struct inode *inode, { struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest; - if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) + if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) { largest->len = 0; + mark_inode_dirty_sync(inode); + } } /* return true, if inode page is changed */ @@ -335,11 +337,12 @@ lookup_neighbors: return en; } -static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, +static struct extent_node *__try_merge_extent_node(struct inode *inode, struct extent_tree *et, struct extent_info *ei, struct extent_node *prev_ex, struct extent_node *next_ex) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_node *en = NULL; if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) { @@ -360,7 +363,7 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, if (!en) return NULL; - __try_update_largest_extent(et, en); + __try_update_largest_extent(inode, et, en); spin_lock(&sbi->extent_lock); if (!list_empty(&en->list)) { @@ -371,11 +374,12 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, return en; } -static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, +static struct extent_node *__insert_extent_tree(struct inode *inode, struct extent_tree *et, struct extent_info *ei, struct rb_node **insert_p, struct rb_node *insert_parent) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct rb_node **p = &et->root.rb_node; struct rb_node *parent = NULL; struct extent_node *en = NULL; @@ -402,7 +406,7 @@ do_insert: if (!en) return NULL; - __try_update_largest_extent(et, en); + __try_update_largest_extent(inode, et, en); /* update in global extent list */ spin_lock(&sbi->extent_lock); @@ -473,7 +477,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, set_extent_info(&ei, end, end - dei.fofs + dei.blk, org_end - end); - en1 = __insert_extent_tree(sbi, et, &ei, + en1 = __insert_extent_tree(inode, et, &ei, NULL, NULL); next_en = en1; } else { @@ -494,7 +498,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, } if (parts) - __try_update_largest_extent(et, en); + __try_update_largest_extent(inode, et, en); else __release_extent_node(sbi, et, en); @@ -514,15 +518,15 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, if (blkaddr) { set_extent_info(&ei, fofs, blkaddr, len); - if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) - __insert_extent_tree(sbi, et, &ei, + if (!__try_merge_extent_node(inode, et, &ei, prev_en, next_en)) + __insert_extent_tree(inode, et, &ei, insert_p, insert_parent); /* give up extent_cache, if split and small updates happen */ if (dei.len >= 1 && prev.len < F2FS_MIN_EXTENT_LEN && et->largest.len < F2FS_MIN_EXTENT_LEN) { - et->largest.len = 0; + __drop_largest_extent(inode, 0, UINT_MAX); set_inode_flag(inode, FI_NO_EXTENT); } } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d98aaf3829a6..0534d7a18b35 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -498,11 +498,13 @@ static inline bool __is_front_mergeable(struct extent_info *cur, return __is_extent_mergeable(cur, front); } -static inline void __try_update_largest_extent(struct extent_tree *et, - struct extent_node *en) +static inline void __try_update_largest_extent(struct inode *inode, + struct extent_tree *et, struct extent_node *en) { - if (en->ei.len > et->largest.len) + if (en->ei.len > et->largest.len) { et->largest = en->ei; + mark_inode_dirty_sync(inode); + } } struct f2fs_nm_info { @@ -1534,10 +1536,26 @@ enum { FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ }; +static inline void __mark_inode_dirty_flag(struct inode *inode, + int flag, bool set) +{ + switch (flag) { + case FI_INLINE_XATTR: + case FI_INLINE_DATA: + case FI_INLINE_DENTRY: + if (set) + return; + case FI_DATA_EXIST: + case FI_INLINE_DOTS: + mark_inode_dirty_sync(inode); + } +} + static inline void set_inode_flag(struct inode *inode, int flag) { if (!test_bit(flag, &F2FS_I(inode)->flags)) set_bit(flag, &F2FS_I(inode)->flags); + __mark_inode_dirty_flag(inode, flag, true); } static inline int is_inode_flag_set(struct inode *inode, int flag) @@ -1549,12 +1567,14 @@ static inline void clear_inode_flag(struct inode *inode, int flag) { if (test_bit(flag, &F2FS_I(inode)->flags)) clear_bit(flag, &F2FS_I(inode)->flags); + __mark_inode_dirty_flag(inode, flag, false); } static inline void set_acl_inode(struct inode *inode, umode_t mode) { F2FS_I(inode)->i_acl_mode = mode; set_inode_flag(inode, FI_ACL_MODE); + mark_inode_dirty_sync(inode); } static inline void f2fs_i_links_write(struct inode *inode, bool inc) @@ -1583,18 +1603,38 @@ static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size) mark_inode_dirty_sync(inode); } +static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) +{ + F2FS_I(inode)->i_current_depth = depth; + mark_inode_dirty_sync(inode); +} + +static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid) +{ + F2FS_I(inode)->i_xattr_nid = xnid; + mark_inode_dirty_sync(inode); +} + +static inline void f2fs_i_pino_write(struct inode *inode, nid_t pino) +{ + F2FS_I(inode)->i_pino = pino; + mark_inode_dirty_sync(inode); +} + static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) { + struct f2fs_inode_info *fi = F2FS_I(inode); + if (ri->i_inline & F2FS_INLINE_XATTR) - set_inode_flag(inode, FI_INLINE_XATTR); + set_bit(FI_INLINE_XATTR, &fi->flags); if (ri->i_inline & F2FS_INLINE_DATA) - set_inode_flag(inode, FI_INLINE_DATA); + set_bit(FI_INLINE_DATA, &fi->flags); if (ri->i_inline & F2FS_INLINE_DENTRY) - set_inode_flag(inode, FI_INLINE_DENTRY); + set_bit(FI_INLINE_DENTRY, &fi->flags); if (ri->i_inline & F2FS_DATA_EXIST) - set_inode_flag(inode, FI_DATA_EXIST); + set_bit(FI_DATA_EXIST, &fi->flags); if (ri->i_inline & F2FS_INLINE_DOTS) - set_inode_flag(inode, FI_INLINE_DOTS); + set_bit(FI_INLINE_DOTS, &fi->flags); } static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) @@ -1706,11 +1746,13 @@ static inline int is_file(struct inode *inode, int type) static inline void set_file(struct inode *inode, int type) { F2FS_I(inode)->i_advise |= type; + mark_inode_dirty_sync(inode); } static inline void clear_file(struct inode *inode, int type) { F2FS_I(inode)->i_advise &= ~type; + mark_inode_dirty_sync(inode); } static inline int f2fs_readonly(struct super_block *sb) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d0f42587309f..c5606b1e1a89 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -171,11 +171,10 @@ static void try_to_fix_pino(struct inode *inode) fi->xattr_ver = 0; if (file_wrong_pino(inode) && inode->i_nlink == 1 && get_parent_ino(inode, &pino)) { - fi->i_pino = pino; + f2fs_i_pino_write(inode, pino); file_got_pino(inode); up_write(&fi->i_sem); - mark_inode_dirty_sync(inode); f2fs_write_inode(inode, NULL); } else { up_write(&fi->i_sem); @@ -636,7 +635,7 @@ int f2fs_truncate(struct inode *inode, bool lock) return err; inode->i_mtime = inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); + mark_inode_dirty_sync(inode); return 0; } @@ -726,7 +725,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) } } - mark_inode_dirty(inode); + mark_inode_dirty_sync(inode); return err; } @@ -1279,7 +1278,7 @@ static long f2fs_fallocate(struct file *file, int mode, if (!ret) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); + mark_inode_dirty_sync(inode); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); } @@ -1370,9 +1369,8 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) fi->i_flags = flags; inode_unlock(inode); - f2fs_set_inode_flags(inode); inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); + f2fs_set_inode_flags(inode); out: mnt_drop_write_file(filp); return ret; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 7a9bc442dd77..4bc025c29f82 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -400,7 +400,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); - F2FS_I(dir)->i_current_depth = 1; + f2fs_i_depth_write(dir, 1); if (i_size_read(dir) < PAGE_SIZE) { f2fs_i_size_write(dir, PAGE_SIZE); set_inode_flag(dir, FI_UPDATE_DIR); @@ -492,7 +492,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, recover: lock_page(ipage); memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA); - F2FS_I(dir)->i_current_depth = 0; + f2fs_i_depth_write(dir, 0); f2fs_i_size_write(dir, MAX_INLINE_DATA); update_inode(dir, ipage); f2fs_put_page(ipage, 1); @@ -558,7 +558,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, /* we don't need to mark_inode_dirty now */ if (inode) { - F2FS_I(inode)->i_pino = dir->i_ino; + f2fs_i_pino_write(inode, dir->i_ino); update_inode(inode, page); f2fs_put_page(page, 1); } @@ -597,6 +597,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, set_page_dirty(page); dir->i_ctime = dir->i_mtime = CURRENT_TIME; + mark_inode_dirty_sync(dir); if (inode) f2fs_drop_nlink(dir, inode, page); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 62d8c9052f9b..34aa0949e48c 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -35,6 +35,7 @@ void f2fs_set_inode_flags(struct inode *inode) new_fl |= S_DIRSYNC; inode_set_flags(inode, new_fl, S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + mark_inode_dirty_sync(inode); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index af7c75ab9343..f2b2c4068648 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -76,7 +76,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) stat_inc_inline_dir(inode); trace_f2fs_new_inode(inode, 0); - mark_inode_dirty(inode); return inode; fail: @@ -247,10 +246,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR); } out: - if (!err) { + if (!err) clear_inode_flag(dir, FI_INLINE_DOTS); - mark_inode_dirty(dir); - } f2fs_unlock_op(sbi); return err; @@ -756,7 +753,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(old_inode); + mark_inode_dirty_sync(old_inode); f2fs_delete_entry(old_entry, old_page, old_dir, NULL); @@ -910,7 +907,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_i_links_write(old_dir, old_nlink > 0); up_write(&F2FS_I(old_dir)->i_sem); } - mark_inode_dirty(old_dir); + mark_inode_dirty_sync(old_dir); update_inode_page(old_dir); /* update directory entry info of new dir inode */ @@ -928,7 +925,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_i_links_write(new_dir, new_nlink > 0); up_write(&F2FS_I(new_dir)->i_sem); } - mark_inode_dirty(new_dir); + mark_inode_dirty_sync(new_dir); update_inode_page(new_dir); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 8001020f7762..0635304c50ac 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -953,7 +953,7 @@ int truncate_xattr_node(struct inode *inode, struct page *page) if (IS_ERR(npage)) return PTR_ERR(npage); - F2FS_I(inode)->i_xattr_nid = 0; + f2fs_i_xnid_write(inode, 0); /* need to do checkpoint during fsync */ F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); @@ -1047,7 +1047,7 @@ struct page *new_node_page(struct dnode_of_data *dn, dn->node_changed = true; if (f2fs_has_xattr_block(ofs)) - F2FS_I(dn->inode)->i_xattr_nid = dn->nid; + f2fs_i_xnid_write(dn->inode, dn->nid); dn->node_page = page; if (ipage) @@ -1997,7 +1997,7 @@ recover_xnid: get_node_info(sbi, new_xnid, &ni); ni.ino = inode->i_ino; set_node_addr(sbi, &ni, NEW_ADDR, false); - F2FS_I(inode)->i_xattr_nid = new_xnid; + f2fs_i_xnid_write(inode, new_xnid); /* 3: update xattr blkaddr */ refresh_sit_entry(sbi, NEW_ADDR, blkaddr); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index ca12d4b051f7..1fe6366a60c1 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -106,7 +106,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler, return -EINVAL; F2FS_I(inode)->i_advise |= *(char *)value; - mark_inode_dirty(inode); + mark_inode_dirty_sync(inode); return 0; } @@ -551,6 +551,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, update_inode(inode, ipage); else update_inode_page(inode); + mark_inode_dirty_sync(inode); exit: kzfree(base_addr); return error; From 0f18b462b2e5aff64b8638e8a47284b907351ef3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 11:10:10 -0700 Subject: [PATCH 07/68] f2fs: flush inode metadata when checkpoint is doing This patch registers all the inodes which have dirty metadata to sync when checkpoint is doing. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 36 +++++++++++++++++++++++++++++ fs/f2fs/debug.c | 5 ++-- fs/f2fs/f2fs.h | 9 ++++++-- fs/f2fs/inode.c | 7 +++++- fs/f2fs/node.c | 1 + fs/f2fs/segment.h | 2 ++ fs/f2fs/super.c | 54 ++++++++++++++++++++++++++++++++++++++++++-- 7 files changed, 107 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 02e0522beccf..5ddd15cd6c6c 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -859,6 +859,34 @@ retry: goto retry; } +int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) +{ + struct list_head *head = &sbi->inode_list[DIRTY_META]; + struct inode *inode; + struct f2fs_inode_info *fi; + s64 total = get_pages(sbi, F2FS_DIRTY_IMETA); + + while (total--) { + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + + spin_lock(&sbi->inode_lock[DIRTY_META]); + if (list_empty(head)) { + spin_unlock(&sbi->inode_lock[DIRTY_META]); + return 0; + } + fi = list_entry(head->next, struct f2fs_inode_info, + gdirty_list); + inode = igrab(&fi->vfs_inode); + spin_unlock(&sbi->inode_lock[DIRTY_META]); + if (inode) { + update_inode_page(inode); + iput(inode); + } + }; + return 0; +} + /* * Freeze all the FS-operations for checkpoint. */ @@ -885,6 +913,14 @@ retry_flush_dents: goto retry_flush_dents; } + if (get_pages(sbi, F2FS_DIRTY_IMETA)) { + f2fs_unlock_all(sbi); + err = f2fs_sync_inode_meta(sbi); + if (err) + goto out; + goto retry_flush_dents; + } + /* * POR: we should ensure that there are no dirty node pages * until finishing nat/sit flush. diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index d89a425055d0..badd407bb622 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -47,6 +47,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA); si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; + si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->wb_bios = atomic_read(&sbi->nr_wb_bios); si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; @@ -304,8 +305,8 @@ static int stat_show(struct seq_file *s, void *v) si->inmem_pages, si->wb_bios); seq_printf(s, " - nodes: %4lld in %4d\n", si->ndirty_node, si->node_pages); - seq_printf(s, " - dents: %4lld in dirs:%4d\n", - si->ndirty_dent, si->ndirty_dirs); + seq_printf(s, " - dents: %4lld in dirs:%4d (%4d)\n", + si->ndirty_dent, si->ndirty_dirs, si->ndirty_all); seq_printf(s, " - datas: %4lld in files:%4d\n", si->ndirty_data, si->ndirty_files); seq_printf(s, " - meta: %4lld in %4d\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0534d7a18b35..b541164ce5af 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -442,7 +442,8 @@ struct f2fs_inode_info { nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ - struct list_head dirty_list; /* linked in global dirty list */ + struct list_head dirty_list; /* dirty list for dirs and files */ + struct list_head gdirty_list; /* linked in global dirty list */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct mutex inmem_lock; /* lock for inmemory pages */ struct extent_tree *extent_tree; /* cached extent_tree entry */ @@ -657,6 +658,7 @@ enum count_type { F2FS_DIRTY_NODES, F2FS_DIRTY_META, F2FS_INMEM_PAGES, + F2FS_DIRTY_IMETA, NR_COUNT_TYPE, }; @@ -707,6 +709,7 @@ struct f2fs_bio_info { enum inode_type { DIR_INODE, /* for dirty dir inode */ FILE_INODE, /* for dirty regular/symlink inode */ + DIRTY_META, /* for all dirtied inode metadata */ NR_INODE_TYPE, }; @@ -1899,6 +1902,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) /* * super.c */ +void f2fs_inode_synced(struct inode *); int f2fs_commit_super(struct f2fs_sb_info *, bool); int f2fs_sync_fs(struct super_block *, int); extern __printf(3, 4) @@ -2010,6 +2014,7 @@ void add_ino_entry(struct f2fs_sb_info *, nid_t, int type); void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type); void release_ino_entry(struct f2fs_sb_info *, bool); bool exist_written_data(struct f2fs_sb_info *, nid_t, int); +int f2fs_sync_inode_meta(struct f2fs_sb_info *); int acquire_orphan_inode(struct f2fs_sb_info *); void release_orphan_inode(struct f2fs_sb_info *); void add_orphan_inode(struct f2fs_sb_info *, nid_t); @@ -2078,7 +2083,7 @@ struct f2fs_stat_info { unsigned long long hit_total, total_ext; int ext_tree, zombie_tree, ext_node; s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, inmem_pages; - unsigned int ndirty_dirs, ndirty_files; + unsigned int ndirty_dirs, ndirty_files, ndirty_all; int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; int bg_gc, wb_bios; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 34aa0949e48c..2d892b6d5632 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -262,7 +262,7 @@ int update_inode(struct inode *inode, struct page *node_page) __set_inode_rdev(inode, ri); set_cold_node(inode, node_page); - clear_inode_flag(inode, FI_DIRTY_INODE); + f2fs_inode_synced(inode); /* deleted inode */ if (inode->i_nlink == 0) @@ -286,6 +286,7 @@ retry: } else if (err != -ENOENT) { f2fs_stop_checkpoint(sbi, false); } + f2fs_inode_synced(inode); return 0; } ret = update_inode(inode, node_page); @@ -360,6 +361,8 @@ retry: goto retry; } + if (err) + update_inode_page(inode); sb_end_intwrite(inode->i_sb); no_delete: stat_dec_inline_xattr(inode); @@ -381,6 +384,8 @@ no_delete: !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); out_clear: fscrypt_put_encryption_info(inode, NULL); + + f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); clear_inode(inode); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0635304c50ac..1965351b644c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -670,6 +670,7 @@ static void truncate_node(struct dnode_of_data *dn) if (dn->nid == dn->inode->i_ino) { remove_orphan_inode(sbi, dn->nid); dec_valid_inode_count(sbi); + f2fs_inode_synced(dn->inode); } else { sync_inode_page(dn); } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index fcdd7310b961..5d016a1edf21 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -479,6 +479,8 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + node_secs += get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d832bf4051b5..b5144b81e4c4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -537,6 +537,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) fi->i_advise = 0; init_rwsem(&fi->i_sem); INIT_LIST_HEAD(&fi->dirty_list); + INIT_LIST_HEAD(&fi->gdirty_list); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); @@ -547,6 +548,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) static int f2fs_drop_inode(struct inode *inode) { + int ret; + /* * This is to avoid a deadlock condition like below. * writeback_single_inode(inode) @@ -554,7 +557,7 @@ static int f2fs_drop_inode(struct inode *inode) * - f2fs_gc -> iput -> evict * - inode_wait_for_writeback(inode) */ - if (!inode_unhashed(inode) && inode->i_state & I_SYNC) { + if ((!inode_unhashed(inode) && inode->i_state & I_SYNC)) { if (!inode->i_nlink && !is_bad_inode(inode)) { /* to avoid evict_inode call simultaneously */ atomic_inc(&inode->i_count); @@ -581,7 +584,20 @@ static int f2fs_drop_inode(struct inode *inode) } return 0; } - return generic_drop_inode(inode); + + ret = generic_drop_inode(inode); + if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { + if (ret) + inode->i_state |= I_WILL_FREE; + spin_unlock(&inode->i_lock); + + update_inode_page(inode); + + spin_lock(&inode->i_lock); + if (ret) + inode->i_state &= ~I_WILL_FREE; + } + return ret; } /* @@ -591,7 +607,40 @@ static int f2fs_drop_inode(struct inode *inode) */ static void f2fs_dirty_inode(struct inode *inode, int flags) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (inode->i_ino == F2FS_NODE_INO(sbi) || + inode->i_ino == F2FS_META_INO(sbi)) + return; + + spin_lock(&sbi->inode_lock[DIRTY_META]); + if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { + spin_unlock(&sbi->inode_lock[DIRTY_META]); + return; + } + set_inode_flag(inode, FI_DIRTY_INODE); + list_add_tail(&F2FS_I(inode)->gdirty_list, + &sbi->inode_list[DIRTY_META]); + inc_page_count(sbi, F2FS_DIRTY_IMETA); + spin_unlock(&sbi->inode_lock[DIRTY_META]); + stat_inc_dirty_inode(sbi, DIRTY_META); +} + +void f2fs_inode_synced(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + spin_lock(&sbi->inode_lock[DIRTY_META]); + if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) { + spin_unlock(&sbi->inode_lock[DIRTY_META]); + return; + } + list_del_init(&F2FS_I(inode)->gdirty_list); + clear_inode_flag(inode, FI_DIRTY_INODE); + dec_page_count(sbi, F2FS_DIRTY_IMETA); + spin_unlock(&sbi->inode_lock[DIRTY_META]); + stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META); } static void f2fs_i_callback(struct rcu_head *head) @@ -1757,6 +1806,7 @@ try_onemore: return 0; free_kobj: + f2fs_sync_inode_meta(sbi); kobject_del(&sbi->s_kobj); kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); From ee6d182f2a19d5d44607b5ae4bec523726d76a99 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 16:32:49 -0700 Subject: [PATCH 08/68] f2fs: remove syncing inode page in all the cases This patch reduces to call them across the whole tree. - sync_inode_page() - update_inode_page() - update_inode() - f2fs_write_inode() Instead, checkpoint will flush all the dirty inode metadata before syncing node pages. Note that, this is doable, since we call mark_inode_dirty_sync() for all inode's field change which needs to update on-disk inode as well. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +---------- fs/f2fs/dir.c | 24 +++--------------------- fs/f2fs/extent_cache.c | 7 ++----- fs/f2fs/f2fs.h | 2 -- fs/f2fs/file.c | 15 +++------------ fs/f2fs/inline.c | 27 ++++++--------------------- fs/f2fs/namei.c | 15 +-------------- fs/f2fs/node.c | 28 ---------------------------- fs/f2fs/recovery.c | 3 --- fs/f2fs/xattr.c | 6 +----- 10 files changed, 17 insertions(+), 121 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6a4c60c2fd3a..a3dea51f4702 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -343,8 +343,6 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) if (set_page_dirty(dn->node_page)) dn->node_changed = true; - - sync_inode_page(dn); return 0; } @@ -562,11 +560,8 @@ struct page *get_new_data_page(struct inode *inode, } got_it: if (new_i_size && i_size_read(inode) < - ((loff_t)(index + 1) << PAGE_SHIFT)) { + ((loff_t)(index + 1) << PAGE_SHIFT)) f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT)); - /* Only the directory inode sets new_i_size */ - set_inode_flag(inode, FI_UPDATE_DIR); - } return page; } @@ -787,8 +782,6 @@ skip: else if (dn.ofs_in_node < end_offset) goto next_block; - if (allocated) - sync_inode_page(&dn); f2fs_put_dnode(&dn); if (create) { @@ -799,8 +792,6 @@ skip: goto next_dnode; sync_out: - if (allocated) - sync_inode_page(&dn); f2fs_put_dnode(&dn); unlock_out: if (create) { diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 384d51cb77bf..24d1308838b5 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -454,19 +454,15 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { if (inode && is_inode_flag_set(inode, FI_NEW_INODE)) { - if (S_ISDIR(inode->i_mode)) { + if (S_ISDIR(inode->i_mode)) f2fs_i_links_write(dir, true); - set_inode_flag(dir, FI_UPDATE_DIR); - } clear_inode_flag(inode, FI_NEW_INODE); } dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty_sync(dir); - if (F2FS_I(dir)->i_current_depth != current_depth) { + if (F2FS_I(dir)->i_current_depth != current_depth) f2fs_i_depth_write(dir, current_depth); - set_inode_flag(dir, FI_UPDATE_DIR); - } if (inode && is_inode_flag_set(inode, FI_INC_LINK)) clear_inode_flag(inode, FI_INC_LINK); @@ -595,9 +591,7 @@ add_dentry: set_page_dirty(dentry_page); if (inode) { - /* we don't need to mark_inode_dirty now */ f2fs_i_pino_write(inode, dir->i_ino); - update_inode(inode, page); f2fs_put_page(page, 1); } @@ -606,10 +600,6 @@ fail: if (inode) up_write(&F2FS_I(inode)->i_sem); - if (is_inode_flag_set(dir, FI_UPDATE_DIR)) { - update_inode_page(dir); - clear_inode_flag(dir, FI_UPDATE_DIR); - } kunmap(dentry_page); f2fs_put_page(dentry_page, 1); @@ -656,8 +646,6 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) err = PTR_ERR(page); goto fail; } - /* we don't need to mark_inode_dirty now */ - update_inode(inode, page); f2fs_put_page(page, 1); clear_inode_flag(inode, FI_NEW_INODE); @@ -673,13 +661,8 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page) down_write(&F2FS_I(inode)->i_sem); - if (S_ISDIR(inode->i_mode)) { + if (S_ISDIR(inode->i_mode)) f2fs_i_links_write(dir, false); - if (page) - update_inode(dir, page); - else - update_inode_page(dir); - } inode->i_ctime = CURRENT_TIME; f2fs_i_links_write(inode, false); @@ -688,7 +671,6 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page) f2fs_i_size_write(inode, 0); } up_write(&F2FS_I(inode)->i_sem); - update_inode_page(inode); if (inode->i_nlink == 0) add_orphan_inode(sbi, inode->i_ino); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index d21dda607bf2..e858869d76cb 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -689,9 +689,7 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn) fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + dn->ofs_in_node; - - if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1)) - sync_inode_page(dn); + f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1); } void f2fs_update_extent_cache_range(struct dnode_of_data *dn, @@ -701,8 +699,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn, if (!f2fs_may_extent_tree(dn->inode)) return; - if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len)) - sync_inode_page(dn); + f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len); } void init_extent_cache_info(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b541164ce5af..2adef0e58461 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1521,7 +1521,6 @@ enum { FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ FI_FREE_NID, /* free allocated nide */ - FI_UPDATE_DIR, /* should update inode block for consistency */ FI_NO_EXTENT, /* not to use the extent cache */ FI_INLINE_XATTR, /* used for inline xattr */ FI_INLINE_DATA, /* used for inline data*/ @@ -1936,7 +1935,6 @@ struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); struct page *get_node_page_ra(struct page *, int); -void sync_inode_page(struct dnode_of_data *); void move_node_page(struct page *, int); int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *, bool); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c5606b1e1a89..73bc946974ad 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -173,12 +173,8 @@ static void try_to_fix_pino(struct inode *inode) get_parent_ino(inode, &pino)) { f2fs_i_pino_write(inode, pino); file_got_pino(inode); - up_write(&fi->i_sem); - - f2fs_write_inode(inode, NULL); - } else { - up_write(&fi->i_sem); } + up_write(&fi->i_sem); } static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, @@ -499,7 +495,6 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) dn->inode) + ofs; f2fs_update_extent_cache_range(dn, fofs, 0, len); dec_valid_block_count(sbi, dn->inode, nr_free); - sync_inode_page(dn); } dn->ofs_in_node = ofs; @@ -1123,10 +1118,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, } out: - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) { + if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) f2fs_i_size_write(inode, new_size); - update_inode_page(inode); - } return ret; } @@ -1232,10 +1225,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset, new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; } - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) { + if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) f2fs_i_size_write(inode, new_size); - update_inode_page(inode); - } return ret; } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4bc025c29f82..77c9c2439993 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -73,7 +73,7 @@ bool truncate_inline_inode(struct page *ipage, u64 from) f2fs_wait_on_page_writeback(ipage, NODE, true); memset(addr + from, 0, MAX_INLINE_DATA - from); - + set_page_dirty(ipage); return true; } @@ -146,7 +146,6 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) clear_out: stat_dec_inline_inode(dn->inode); f2fs_clear_inline_inode(dn->inode); - sync_inode_page(dn); f2fs_put_dnode(dn); return 0; } @@ -212,11 +211,11 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) dst_addr = inline_data_addr(dn.inode_page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); kunmap_atomic(src_addr); + set_page_dirty(dn.inode_page); set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); - sync_inode_page(&dn); clear_inline_node(dn.inode_page); f2fs_put_dnode(&dn); return 0; @@ -255,7 +254,7 @@ process_inline: set_inode_flag(inode, FI_INLINE_DATA); set_inode_flag(inode, FI_DATA_EXIST); - update_inode(inode, ipage); + set_page_dirty(ipage); f2fs_put_page(ipage, 1); return true; } @@ -266,7 +265,6 @@ process_inline: if (!truncate_inline_inode(ipage, 0)) return false; f2fs_clear_inline_inode(inode); - update_inode(inode, ipage); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { if (truncate_blocks(inode, 0, false)) @@ -339,10 +337,8 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, set_page_dirty(ipage); /* update i_size to MAX_INLINE_DATA */ - if (i_size_read(inode) < MAX_INLINE_DATA) { + if (i_size_read(inode) < MAX_INLINE_DATA) f2fs_i_size_write(inode, MAX_INLINE_DATA); - set_inode_flag(inode, FI_UPDATE_DIR); - } return 0; } @@ -401,12 +397,8 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, clear_inode_flag(dir, FI_INLINE_DENTRY); f2fs_i_depth_write(dir, 1); - if (i_size_read(dir) < PAGE_SIZE) { + if (i_size_read(dir) < PAGE_SIZE) f2fs_i_size_write(dir, PAGE_SIZE); - set_inode_flag(dir, FI_UPDATE_DIR); - } - - sync_inode_page(&dn); out: f2fs_put_page(page, 1); return err; @@ -486,7 +478,6 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); - update_inode(dir, ipage); kfree(backup_dentry); return 0; recover: @@ -494,7 +485,7 @@ recover: memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA); f2fs_i_depth_write(dir, 0); f2fs_i_size_write(dir, MAX_INLINE_DATA); - update_inode(dir, ipage); + set_page_dirty(ipage); f2fs_put_page(ipage, 1); kfree(backup_dentry); @@ -559,7 +550,6 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, /* we don't need to mark_inode_dirty now */ if (inode) { f2fs_i_pino_write(inode, dir->i_ino); - update_inode(inode, page); f2fs_put_page(page, 1); } @@ -567,11 +557,6 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, fail: if (inode) up_write(&F2FS_I(inode)->i_sem); - - if (is_inode_flag_set(dir, FI_UPDATE_DIR)) { - update_inode(dir, ipage); - clear_inode_flag(dir, FI_UPDATE_DIR); - } out: f2fs_put_page(ipage, 1); return err; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index f2b2c4068648..496f4e3018b2 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -706,9 +706,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, add_orphan_inode(sbi, new_inode->i_ino); else release_orphan_inode(sbi); - - update_inode_page(old_inode); - update_inode_page(new_inode); } else { f2fs_balance_fs(sbi, true); @@ -720,10 +717,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_whiteout; } - if (old_dir_entry) { + if (old_dir_entry) f2fs_i_links_write(new_dir, true); - update_inode_page(new_dir); - } /* * old entry and new entry can locate in the same inline @@ -771,13 +766,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_dir != new_dir && !whiteout) { f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); - update_inode_page(old_inode); } else { f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } f2fs_i_links_write(old_dir, false); - update_inode_page(old_dir); } f2fs_unlock_op(sbi); @@ -899,8 +892,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, file_lost_pino(old_inode); up_write(&F2FS_I(old_inode)->i_sem); - update_inode_page(old_inode); - old_dir->i_ctime = CURRENT_TIME; if (old_nlink) { down_write(&F2FS_I(old_dir)->i_sem); @@ -908,7 +899,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, up_write(&F2FS_I(old_dir)->i_sem); } mark_inode_dirty_sync(old_dir); - update_inode_page(old_dir); /* update directory entry info of new dir inode */ f2fs_set_link(new_dir, new_entry, new_page, old_inode); @@ -917,8 +907,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, file_lost_pino(new_inode); up_write(&F2FS_I(new_inode)->i_sem); - update_inode_page(new_inode); - new_dir->i_ctime = CURRENT_TIME; if (new_nlink) { down_write(&F2FS_I(new_dir)->i_sem); @@ -926,7 +914,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, up_write(&F2FS_I(new_dir)->i_sem); } mark_inode_dirty_sync(new_dir); - update_inode_page(new_dir); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1965351b644c..82f0f833151e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -671,8 +671,6 @@ static void truncate_node(struct dnode_of_data *dn) remove_orphan_inode(sbi, dn->nid); dec_valid_inode_count(sbi); f2fs_inode_synced(dn->inode); - } else { - sync_inode_page(dn); } invalidate: clear_node_page_dirty(dn->node_page); @@ -1050,14 +1048,8 @@ struct page *new_node_page(struct dnode_of_data *dn, if (f2fs_has_xattr_block(ofs)) f2fs_i_xnid_write(dn->inode, dn->nid); - dn->node_page = page; - if (ipage) - update_inode(dn->inode, ipage); - else - sync_inode_page(dn); if (ofs == 0) inc_valid_inode_count(sbi); - return page; fail: @@ -1176,24 +1168,6 @@ struct page *get_node_page_ra(struct page *parent, int start) return __get_node_page(sbi, nid, parent, start); } -void sync_inode_page(struct dnode_of_data *dn) -{ - int ret = 0; - - if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) { - ret = update_inode(dn->inode, dn->node_page); - } else if (dn->inode_page) { - if (!dn->inode_page_locked) - lock_page(dn->inode_page); - ret = update_inode(dn->inode, dn->inode_page); - if (!dn->inode_page_locked) - unlock_page(dn->inode_page); - } else { - ret = update_inode_page(dn->inode); - } - dn->node_changed = ret ? true: false; -} - static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) { struct inode *inode; @@ -2003,8 +1977,6 @@ recover_xnid: /* 3: update xattr blkaddr */ refresh_sit_entry(sbi, NEW_ADDR, blkaddr); set_node_addr(sbi, &ni, blkaddr, false); - - update_inode_page(inode); } int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 2500b6a5daf0..68c433f17ab5 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -490,9 +490,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } } - if (IS_INODE(dn.node_page)) - sync_inode_page(&dn); - copy_node_footer(dn.node_page, page); fill_node_footer(dn.node_page, dn.nid, ni.ino, ofs_of_node(page), false); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 1fe6366a60c1..8c0a3b36a917 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -299,6 +299,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, if (ipage) { inline_addr = inline_xattr_addr(ipage); f2fs_wait_on_page_writeback(ipage, NODE, true); + set_page_dirty(ipage); } else { page = get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { @@ -546,11 +547,6 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (index == F2FS_XATTR_INDEX_ENCRYPTION && !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) f2fs_set_encrypted_inode(inode); - - if (ipage) - update_inode(inode, ipage); - else - update_inode_page(inode); mark_inode_dirty_sync(inode); exit: kzfree(base_addr); From 26de9b11713057a16a9220423a2f137774763b0e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 20:42:37 -0700 Subject: [PATCH 09/68] f2fs: avoid unnecessary updating inode during fsync If roll-forward recovery can recover i_size, we don't need to update inode's metadata during fsync. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +++ fs/f2fs/f2fs.h | 23 +++++++++++++++++++++-- fs/f2fs/file.c | 4 ++-- fs/f2fs/inode.c | 3 +++ fs/f2fs/node.c | 9 +++++++-- fs/f2fs/recovery.c | 3 +++ fs/f2fs/super.c | 4 ++++ 7 files changed, 43 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a3dea51f4702..287582e12f8f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1204,6 +1204,7 @@ static int f2fs_write_data_page(struct page *page, loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long) i_size) >> PAGE_SHIFT; + loff_t psize = (page->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; int err = 0; @@ -1265,6 +1266,8 @@ write: err = f2fs_write_inline_data(inode, page); if (err == -EAGAIN) err = do_write_data_page(&fio); + if (F2FS_I(inode)->last_disk_size < psize) + F2FS_I(inode)->last_disk_size = psize; f2fs_unlock_op(sbi); done: if (err && err != -ENOENT) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2adef0e58461..bf1c8b0ef6c7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -441,6 +441,7 @@ struct f2fs_inode_info { unsigned int clevel; /* maximum level of given file name */ nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ + loff_t last_disk_size; /* lastly written file size */ struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ @@ -1516,6 +1517,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) enum { FI_NEW_INODE, /* indicate newly allocated inode */ FI_DIRTY_INODE, /* indicate inode is dirty or not */ + FI_AUTO_RECOVER, /* indicate inode is recoverable */ FI_DIRTY_DIR, /* indicate directory has dirty pages */ FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ @@ -1591,18 +1593,35 @@ static inline void f2fs_i_links_write(struct inode *inode, bool inc) static inline void f2fs_i_blocks_write(struct inode *inode, blkcnt_t diff, bool add) { + bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE); + bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER); + inode->i_blocks = add ? inode->i_blocks + diff : inode->i_blocks - diff; mark_inode_dirty_sync(inode); + if (clean || recover) + set_inode_flag(inode, FI_AUTO_RECOVER); } static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size) { + bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE); + bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER); + if (i_size_read(inode) == i_size) return; i_size_write(inode, i_size); mark_inode_dirty_sync(inode); + if (clean || recover) + set_inode_flag(inode, FI_AUTO_RECOVER); +} + +static inline bool f2fs_skip_inode_update(struct inode *inode) +{ + if (!is_inode_flag_set(inode, FI_AUTO_RECOVER)) + return false; + return F2FS_I(inode)->last_disk_size == i_size_read(inode); } static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) @@ -1936,8 +1955,8 @@ void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); struct page *get_node_page_ra(struct page *, int); void move_node_page(struct page *, int); -int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *, - bool); +int fsync_node_pages(struct f2fs_sb_info *, struct inode *, + struct writeback_control *, bool); int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *); bool alloc_nid(struct f2fs_sb_info *, nid_t *); void alloc_nid_done(struct f2fs_sb_info *, nid_t); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 73bc946974ad..23decf050236 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -208,7 +208,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, } /* if the inode is dirty, let's recover all the time */ - if (!datasync) { + if (!datasync && !f2fs_skip_inode_update(inode)) { f2fs_write_inode(inode, NULL); goto go_write; } @@ -251,7 +251,7 @@ go_write: goto out; } sync_nodes: - ret = fsync_node_pages(sbi, ino, &wbc, atomic); + ret = fsync_node_pages(sbi, inode, &wbc, atomic); if (ret) goto out; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2d892b6d5632..bdd814db883e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -154,6 +154,9 @@ static int do_read_inode(struct inode *inode) if (__written_first_block(ri)) set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); + if (!need_inode_block_update(sbi, inode->i_ino)) + fi->last_disk_size = inode->i_size; + f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 82f0f833151e..641d60392bff 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1293,7 +1293,7 @@ continue_unlock: return last_page; } -int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, +int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic) { pgoff_t index, end; @@ -1301,6 +1301,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, int ret = 0; struct page *last_page = NULL; bool marked = false; + nid_t ino = inode->i_ino; if (atomic) { last_page = last_fsync_dnode(sbi, ino); @@ -1354,9 +1355,13 @@ continue_unlock: if (!atomic || page == last_page) { set_fsync_mark(page, 1); - if (IS_INODE(page)) + if (IS_INODE(page)) { + if (is_inode_flag_set(inode, + FI_DIRTY_INODE)) + update_inode(inode, page); set_dentry_mark(page, need_dentry_mark(sbi, ino)); + } /* may be written by other thread */ if (!PageDirty(page)) set_page_dirty(page); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 68c433f17ab5..b568b28c74f2 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -455,6 +455,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, continue; } + if ((start + 1) << PAGE_SHIFT > i_size_read(inode)) + f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT); + /* * dest is reserved block, invalidate src block * and then reserve one new block in dnode page. diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b5144b81e4c4..6fa4ec8ea1f7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -613,6 +613,9 @@ static void f2fs_dirty_inode(struct inode *inode, int flags) inode->i_ino == F2FS_META_INO(sbi)) return; + if (is_inode_flag_set(inode, FI_AUTO_RECOVER)) + clear_inode_flag(inode, FI_AUTO_RECOVER); + spin_lock(&sbi->inode_lock[DIRTY_META]); if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { spin_unlock(&sbi->inode_lock[DIRTY_META]); @@ -638,6 +641,7 @@ void f2fs_inode_synced(struct inode *inode) } list_del_init(&F2FS_I(inode)->gdirty_list); clear_inode_flag(inode, FI_DIRTY_INODE); + clear_inode_flag(inode, FI_AUTO_RECOVER); dec_page_count(sbi, F2FS_DIRTY_IMETA); spin_unlock(&sbi->inode_lock[DIRTY_META]); stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META); From 6d94c74ab85fe70dc1ac29b1ffc55cf23b3cf3f9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 21:47:24 -0700 Subject: [PATCH 10/68] f2fs: add lazytime mount option This patch adds lazytime support. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6fa4ec8ea1f7..14c34735e96f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -93,6 +93,8 @@ enum { Opt_noinline_data, Opt_data_flush, Opt_fault_injection, + Opt_lazytime, + Opt_nolazytime, Opt_err, }; @@ -119,6 +121,8 @@ static match_table_t f2fs_tokens = { {Opt_noinline_data, "noinline_data"}, {Opt_data_flush, "data_flush"}, {Opt_fault_injection, "fault_injection=%u"}, + {Opt_lazytime, "lazytime"}, + {Opt_nolazytime, "nolazytime"}, {Opt_err, NULL}, }; @@ -506,6 +510,12 @@ static int parse_options(struct super_block *sb, char *options) "FAULT_INJECTION was not selected"); #endif break; + case Opt_lazytime: + sb->s_flags |= MS_LAZYTIME; + break; + case Opt_nolazytime: + sb->s_flags &= ~MS_LAZYTIME; + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -613,6 +623,9 @@ static void f2fs_dirty_inode(struct inode *inode, int flags) inode->i_ino == F2FS_META_INO(sbi)) return; + if (flags == I_DIRTY_TIME) + return; + if (is_inode_flag_set(inode, FI_AUTO_RECOVER)) clear_inode_flag(inode, FI_AUTO_RECOVER); @@ -932,6 +945,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, BG_GC); set_opt(sbi, INLINE_DATA); set_opt(sbi, EXTENT_CACHE); + sbi->sb->s_flags |= MS_LAZYTIME; #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); From 0a87f664d1ad29cc4712303d2142fe386368f07d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 23 May 2016 12:04:56 -0700 Subject: [PATCH 11/68] f2fs: detect congestion of flush command issues If flush commands do not incur any congestion, we don't need to throw that to dispatching queue which causes unnecessary latency. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bf1c8b0ef6c7..f02a3572fff3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -603,6 +603,7 @@ struct flush_cmd { struct flush_cmd_control { struct task_struct *f2fs_issue_flush; /* flush thread */ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ + atomic_t submit_flush; /* # of issued flushes */ struct llist_head issue_list; /* list for command issue */ struct llist_node *dispatch_list; /* list for command dispatch */ }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 77dc929715cf..34a9159cf5ac 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -433,24 +433,28 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) if (test_opt(sbi, NOBARRIER)) return 0; - if (!test_opt(sbi, FLUSH_MERGE)) { + if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) { struct bio *bio = f2fs_bio_alloc(0); int ret; + atomic_inc(&fcc->submit_flush); bio->bi_bdev = sbi->sb->s_bdev; ret = submit_bio_wait(WRITE_FLUSH, bio); + atomic_dec(&fcc->submit_flush); bio_put(bio); return ret; } init_completion(&cmd.wait); + atomic_inc(&fcc->submit_flush); llist_add(&cmd.llnode, &fcc->issue_list); if (!fcc->dispatch_list) wake_up(&fcc->flush_wait_queue); wait_for_completion(&cmd.wait); + atomic_dec(&fcc->submit_flush); return cmd.ret; } @@ -464,6 +468,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); if (!fcc) return -ENOMEM; + atomic_set(&fcc->submit_flush, 0); init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->cmd_control_info = fcc; From 69e9e4274450c778416bb1ff21e5c6f6276ff418 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 22:39:20 -0700 Subject: [PATCH 12/68] f2fs: set flush_merge by default This patch sets flush_merge by default. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 14c34735e96f..55203f6a4e8b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -86,6 +86,7 @@ enum { Opt_inline_data, Opt_inline_dentry, Opt_flush_merge, + Opt_noflush_merge, Opt_nobarrier, Opt_fastboot, Opt_extent_cache, @@ -114,6 +115,7 @@ static match_table_t f2fs_tokens = { {Opt_inline_data, "inline_data"}, {Opt_inline_dentry, "inline_dentry"}, {Opt_flush_merge, "flush_merge"}, + {Opt_noflush_merge, "noflush_merge"}, {Opt_nobarrier, "nobarrier"}, {Opt_fastboot, "fastboot"}, {Opt_extent_cache, "extent_cache"}, @@ -482,6 +484,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_flush_merge: set_opt(sbi, FLUSH_MERGE); break; + case Opt_noflush_merge: + clear_opt(sbi, FLUSH_MERGE); + break; case Opt_nobarrier: set_opt(sbi, NOBARRIER); break; @@ -946,6 +951,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, INLINE_DATA); set_opt(sbi, EXTENT_CACHE); sbi->sb->s_flags |= MS_LAZYTIME; + set_opt(sbi, FLUSH_MERGE); #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); From b93f7712868648c0529eed6b568cea1493d3d9f9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 22:50:29 -0700 Subject: [PATCH 13/68] f2fs: remove writepages lock This patch removes writepages lock. We can improve multi-threading performance. tiobench, 32 threads, 4KB write per fsync on SSD Before: 25.88 MB/s After: 28.03 MB/s Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 7 ------- fs/f2fs/f2fs.h | 1 - fs/f2fs/super.c | 1 - 3 files changed, 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 287582e12f8f..7132b0202860 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1443,7 +1443,6 @@ static int f2fs_write_data_pages(struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - bool locked = false; int ret; long diff; @@ -1472,14 +1471,8 @@ static int f2fs_write_data_pages(struct address_space *mapping, diff = nr_pages_to_write(sbi, DATA, wbc); - if (!S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_ALL) { - mutex_lock(&sbi->writepages); - locked = true; - } ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE); - if (locked) - mutex_unlock(&sbi->writepages); remove_dirty_inode(inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f02a3572fff3..4365108d40b3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -770,7 +770,6 @@ struct f2fs_sb_info { struct mutex cp_mutex; /* checkpoint procedure lock */ struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ - struct mutex writepages; /* mutex for writepages() */ wait_queue_head_t cp_wait; unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ long interval_time[MAX_TIME]; /* to store thresholds */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 55203f6a4e8b..a5b57390079c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1630,7 +1630,6 @@ try_onemore: sbi->raw_super = raw_super; sbi->valid_super_block = valid_super_block; mutex_init(&sbi->gc_mutex); - mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); From 42d964016e2760d75144f41cc78ad8bdca8dc240 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 25 May 2016 14:29:11 -0700 Subject: [PATCH 14/68] f2fs: propagate error given by f2fs_find_entry If we get ENOMEM or EIO in f2fs_find_entry, we should stop right away. Otherwise, for example, we can get duplicate directory entry by ->chash and ->clevel. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 23 ++++++++++++++++------- fs/f2fs/inline.c | 4 +++- fs/f2fs/namei.c | 5 +++++ 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 24d1308838b5..ae37543f3840 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -185,8 +185,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, /* no need to allocate new dentry pages to all the indices */ dentry_page = find_data_page(dir, bidx); if (IS_ERR(dentry_page)) { - room = true; - continue; + if (PTR_ERR(dentry_page) == -ENOENT) { + room = true; + continue; + } else { + *res_page = dentry_page; + break; + } } de = find_in_block(dentry_page, fname, namehash, &max_slots, @@ -223,19 +228,22 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, struct fscrypt_name fname; int err; - *res_page = NULL; - err = fscrypt_setup_filename(dir, child, 1, &fname); - if (err) + if (err) { + *res_page = ERR_PTR(-ENOMEM); return NULL; + } if (f2fs_has_inline_dentry(dir)) { + *res_page = NULL; de = find_in_inline_dir(dir, &fname, res_page); goto out; } - if (npages == 0) + if (npages == 0) { + *res_page = NULL; goto out; + } max_depth = F2FS_I(dir)->i_current_depth; if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) { @@ -247,8 +255,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, } for (level = 0; level < max_depth; level++) { + *res_page = NULL; de = find_in_level(dir, level, &fname, res_page); - if (de) + if (de || IS_ERR(*res_page)) break; } out: diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 77c9c2439993..1eb30431bf44 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -286,8 +286,10 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, f2fs_hash_t namehash; ipage = get_node_page(sbi, dir->i_ino); - if (IS_ERR(ipage)) + if (IS_ERR(ipage)) { + *res_page = ipage; return NULL; + } namehash = f2fs_dentry_hash(&name); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 496f4e3018b2..3f6119e94a78 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -232,6 +232,9 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) if (de) { f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); + } else if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out; } else { err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); if (err) @@ -242,6 +245,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) if (de) { f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); + } else if (IS_ERR(page)) { + err = PTR_ERR(page); } else { err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR); } From 53aa6bbfdaae6fe71762e66e23c0fda1207beb2b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 25 May 2016 15:24:18 -0700 Subject: [PATCH 15/68] f2fs: inject to produce some orphan inodes Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 +++ fs/f2fs/inode.c | 5 +++++ fs/f2fs/super.c | 1 + 3 files changed, 9 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4365108d40b3..4fec9e7038b6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -45,6 +45,7 @@ enum { FAULT_ORPHAN, FAULT_BLOCK, FAULT_DIR_DEPTH, + FAULT_EVICT_INODE, FAULT_MAX, }; @@ -74,6 +75,8 @@ static inline bool time_to_inject(int type) return false; else if (type == FAULT_DIR_DEPTH && !IS_FAULT_SET(type)) return false; + else if (type == FAULT_EVICT_INODE && !IS_FAULT_SET(type)) + return false; atomic_inc(&f2fs_fault.inject_ops); if (atomic_read(&f2fs_fault.inject_ops) >= f2fs_fault.inject_rate) { diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index bdd814db883e..11cb60af815d 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -345,6 +345,11 @@ void f2fs_evict_inode(struct inode *inode) if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(FAULT_EVICT_INODE)) + goto no_delete; +#endif + sb_start_intwrite(inode->i_sb); set_inode_flag(inode, FI_NO_ALLOC); i_size_write(inode, 0); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a5b57390079c..27f76819eef1 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -49,6 +49,7 @@ char *fault_name[FAULT_MAX] = { [FAULT_ORPHAN] = "orphan", [FAULT_BLOCK] = "no more block", [FAULT_DIR_DEPTH] = "too big dir depth", + [FAULT_EVICT_INODE] = "evict_inode fail", }; static void f2fs_build_fault_attr(unsigned int rate) From 28ea6162e29ba0db87a512dda2bb6d6e63a6006f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 25 May 2016 17:17:56 -0700 Subject: [PATCH 16/68] f2fs: do not skip writing data pages For data pages, let's try to flush as much as possible in background. On /dev/pmem0, 1. dd if=/dev/zero of=/mnt/test/testfile bs=1M count=2048 conv=fsync Before : 800 MB/s After : 1.1 GB/s 2. dd if=/dev/zero of=/mnt/test/testfile bs=1M count=2048 Before : 1.3 GB/s After : 2.2 GB/s Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +++++------ fs/f2fs/segment.h | 4 +--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7132b0202860..85ceb2be4ac9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1444,7 +1444,6 @@ static int f2fs_write_data_pages(struct address_space *mapping, struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int ret; - long diff; /* deal with chardevs and other special file */ if (!mapping->a_ops->writepage) @@ -1469,14 +1468,14 @@ static int f2fs_write_data_pages(struct address_space *mapping, trace_f2fs_writepages(mapping->host, wbc, DATA); - diff = nr_pages_to_write(sbi, DATA, wbc); - ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); - f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE); + /* + * if some pages were truncated, we cannot guarantee its mapping->host + * to detect pending bios. + */ + f2fs_submit_merged_bio(sbi, DATA, WRITE); remove_dirty_inode(inode); - - wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); return ret; skip_write: diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 5d016a1edf21..890bb28d2082 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -728,9 +728,7 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type, nr_to_write = wbc->nr_to_write; - if (type == DATA) - desired = 4096; - else if (type == NODE) + if (type == NODE) desired = 3 * max_hw_blocks(sbi); else desired = MAX_BIO_BLOCKS(sbi); From 46ae957f9b1611be2935ae626f601cda74f8160e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 25 May 2016 20:57:16 -0700 Subject: [PATCH 17/68] f2fs: remove two steps to flush dirty data pages If there is no cold page, we don't need to do a loop to flush dirty data pages. On /dev/pmem0, 1. dd if=/dev/zero of=/mnt/test/testfile bs=1M count=2048 conv=fsync Before : 1.1 GB/s After : 1.2 GB/s 2. dd if=/dev/zero of=/mnt/test/testfile bs=1M count=2048 Before : 2.2 GB/s After : 2.3 GB/s Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 85ceb2be4ac9..5dcd8dbe2064 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1326,10 +1326,9 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int cycled; int range_whole = 0; int tag; - int step = 0; pagevec_init(&pvec, 0); -next: + if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; @@ -1384,9 +1383,6 @@ continue_unlock: goto continue_unlock; } - if (step == is_cold_data(page)) - goto continue_unlock; - if (PageWriteback(page)) { if (wbc->sync_mode != WB_SYNC_NONE) f2fs_wait_on_page_writeback(page, @@ -1421,11 +1417,6 @@ continue_unlock: cond_resched(); } - if (step < 1) { - step++; - goto next; - } - if (!cycled && !done) { cycled = 1; index = 0; From 0c9df7fb80360802f241428be7104f79d7c0f4ee Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Thu, 26 May 2016 19:40:29 +0800 Subject: [PATCH 18/68] f2fs: return the errno to the caller to avoid using a wrong page Commit aaf9607516ed38825268515ef4d773289a44f429 ("f2fs: check node page contents all the time") pointed out that "sometimes it was reported that its contents was missing", so it checks the page's mapping and contents. When "nid != nid_of_node(page)", ERR_PTR(-EIO) will be returned to the caller. However, commit e1c51b9f1df2f9efc2ec11488717e40cd12015f9 ("f2fs: clean up node page updating flow") moves "nid != nid_of_node(page)" test to "f2fs_bug_on(sbi, nid != nid_of_node(page))", this will return a wrong page to the caller when F2FS_CHECK_FS is off when "sometimes it was reported that its contents was missing" happens. This patch restores to check node page contents all the time, and returns the errno to make the caller known something is wrong and avoid to use the page. This patch also moves f2fs_bug_on to its proper location. Signed-off-by: Yunlong Song Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 641d60392bff..16532b31dcd6 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1142,16 +1142,21 @@ repeat: lock_page(page); - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } + if (unlikely(!PageUptodate(page))) + goto out_err; + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { f2fs_put_page(page, 1); goto repeat; } page_hit: - f2fs_bug_on(sbi, nid != nid_of_node(page)); + if(unlikely(nid != nid_of_node(page))) { + f2fs_bug_on(sbi, 1); + ClearPageUptodate(page); +out_err: + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } return page; } From eb4246dc12da4a0369a91c10c2dfe8e2b0955818 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 27 May 2016 10:10:41 -0700 Subject: [PATCH 19/68] f2fs: return error of f2fs_lookup Now we can report an error to f2fs_lookup given by f2fs_find_entry. Suggested-by: He YunLei Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- fs/f2fs/namei.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index ae37543f3840..6fbb1ed182f6 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -230,7 +230,7 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, err = fscrypt_setup_filename(dir, child, 1, &fname); if (err) { - *res_page = ERR_PTR(-ENOMEM); + *res_page = ERR_PTR(err); return NULL; } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 3f6119e94a78..78efe00a3a2f 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -287,8 +287,11 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ENAMETOOLONG); de = f2fs_find_entry(dir, &dentry->d_name, &page); - if (!de) + if (!de) { + if (IS_ERR(page)) + return (struct dentry *)page; return d_splice_alias(inode, dentry); + } ino = le32_to_cpu(de->ino); f2fs_dentry_kunmap(dir, page); From b230e6cabf9e77e210fe7990fea12f8894af0fc1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 29 May 2016 21:18:23 -0700 Subject: [PATCH 20/68] f2fs: handle writepage correctly Previously, f2fs_write_data_pages() calls __f2fs_writepage() which calls f2fs_write_data_page(). If f2fs_write_data_page() returns AOP_WRITEPAGE_ACTIVATE, __f2fs_writepage() calls mapping_set_error(). But, this should not happen at every time, since sometimes f2fs_write_data_page() tries to skip writing pages without error. For example, volatile_write() gives EIO all the time, as Shuoran Liu pointed out. Reported-by: Shuoran Liu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 44 ++++++++++++++------------------------------ 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5dcd8dbe2064..c9d6fe28ae1e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1241,20 +1241,18 @@ write: available_free_memory(sbi, BASE_CHECK)))) goto redirty_out; - /* Dentry blocks are controlled by checkpoint */ - if (S_ISDIR(inode->i_mode)) { - if (unlikely(f2fs_cp_error(sbi))) - goto redirty_out; - err = do_write_data_page(&fio); - goto done; - } - /* we should bypass data pages to proceed the kworkder jobs */ if (unlikely(f2fs_cp_error(sbi))) { SetPageError(page); goto out; } + /* Dentry blocks are controlled by checkpoint */ + if (S_ISDIR(inode->i_mode)) { + err = do_write_data_page(&fio); + goto done; + } + if (!wbc->for_reclaim) need_balance_fs = true; else if (has_not_enough_free_secs(sbi, 0)) @@ -1294,16 +1292,8 @@ out: redirty_out: redirty_page_for_writepage(wbc, page); - return AOP_WRITEPAGE_ACTIVATE; -} - -static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, - void *data) -{ - struct address_space *mapping = data; - int ret = mapping->a_ops->writepage(page, wbc); - mapping_set_error(mapping, ret); - return ret; + unlock_page(page); + return err; } /* @@ -1312,8 +1302,7 @@ static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, * warm/hot data page. */ static int f2fs_write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, writepage_t writepage, - void *data) + struct writeback_control *wbc) { int ret = 0; int done = 0; @@ -1395,16 +1384,11 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = (*writepage)(page, wbc, data); + ret = mapping->a_ops->writepage(page, wbc); if (unlikely(ret)) { - if (ret == AOP_WRITEPAGE_ACTIVATE) { - unlock_page(page); - ret = 0; - } else { - done_index = page->index + 1; - done = 1; - break; - } + done_index = page->index + 1; + done = 1; + break; } if (--wbc->nr_to_write <= 0 && @@ -1459,7 +1443,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, trace_f2fs_writepages(mapping->host, wbc, DATA); - ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); + ret = f2fs_write_cache_pages(mapping, wbc); /* * if some pages were truncated, we cannot guarantee its mapping->host * to detect pending bios. From 9f7c45ccd6d434e2778dd71fb825a7918979fc17 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 1 Jun 2016 21:18:25 -0700 Subject: [PATCH 21/68] f2fs: remove deprecated parameter Remove deprecated paramter. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 4 ++-- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inline.c | 5 ++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 6fbb1ed182f6..f6ab3c2f8145 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -664,7 +664,7 @@ fail: return err; } -void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page) +void f2fs_drop_nlink(struct inode *dir, struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); @@ -723,7 +723,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, mark_inode_dirty_sync(dir); if (inode) - f2fs_drop_nlink(dir, inode, NULL); + f2fs_drop_nlink(dir, inode); if (bit_pos == NR_DENTRY_IN_BLOCK && !truncate_hole(dir, page->index, page->index + 1)) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4fec9e7038b6..dc9e2798ba9c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1895,7 +1895,7 @@ struct page *init_inode_metadata(struct inode *, struct inode *, const struct qstr *, struct page *); void update_parent_metadata(struct inode *, struct inode *, unsigned int); int room_for_filename(const void *, int, int); -void f2fs_drop_nlink(struct inode *, struct inode *, struct page *); +void f2fs_drop_nlink(struct inode *, struct inode *); struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, struct page **); struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 1eb30431bf44..669f92ffd111 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -582,14 +582,13 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, &inline_dentry->dentry_bitmap); set_page_dirty(page); + f2fs_put_page(page, 1); dir->i_ctime = dir->i_mtime = CURRENT_TIME; mark_inode_dirty_sync(dir); if (inode) - f2fs_drop_nlink(dir, inode, page); - - f2fs_put_page(page, 1); + f2fs_drop_nlink(dir, inode); } bool f2fs_empty_inline_dir(struct inode *dir) From 338bbfa086c34ed4ec33bd3c4c4a2ec89336a567 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 2 Jun 2016 11:08:56 -0700 Subject: [PATCH 22/68] f2fs: avoid wrong count on dirty inodes The number should be covered by spin_lock. Otherwise we can see wrong count in f2fs_stat. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 27f76819eef1..9e754336e4e5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -645,8 +645,8 @@ static void f2fs_dirty_inode(struct inode *inode, int flags) list_add_tail(&F2FS_I(inode)->gdirty_list, &sbi->inode_list[DIRTY_META]); inc_page_count(sbi, F2FS_DIRTY_IMETA); - spin_unlock(&sbi->inode_lock[DIRTY_META]); stat_inc_dirty_inode(sbi, DIRTY_META); + spin_unlock(&sbi->inode_lock[DIRTY_META]); } void f2fs_inode_synced(struct inode *inode) @@ -662,8 +662,8 @@ void f2fs_inode_synced(struct inode *inode) clear_inode_flag(inode, FI_DIRTY_INODE); clear_inode_flag(inode, FI_AUTO_RECOVER); dec_page_count(sbi, F2FS_DIRTY_IMETA); - spin_unlock(&sbi->inode_lock[DIRTY_META]); stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META); + spin_unlock(&sbi->inode_lock[DIRTY_META]); } static void f2fs_i_callback(struct rcu_head *head) From 9a449e9c3b34ef3f7029bd966f98cbbfccd144e5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 2 Jun 2016 13:49:38 -0700 Subject: [PATCH 23/68] f2fs: remove obsolete parameter in f2fs_truncate We don't need lock parameter, which is always true. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 6 +++--- fs/f2fs/inode.c | 2 +- fs/f2fs/super.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dc9e2798ba9c..e21f9df133d1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1854,7 +1854,7 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags) int f2fs_sync_file(struct file *, loff_t, loff_t, int); void truncate_data_blocks(struct dnode_of_data *); int truncate_blocks(struct inode *, u64, bool); -int f2fs_truncate(struct inode *, bool); +int f2fs_truncate(struct inode *); int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); int f2fs_setattr(struct dentry *, struct iattr *); int truncate_hole(struct inode *, pgoff_t, pgoff_t); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 23decf050236..7a8d262bc488 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -608,7 +608,7 @@ free_partial: return err; } -int f2fs_truncate(struct inode *inode, bool lock) +int f2fs_truncate(struct inode *inode) { int err; @@ -625,7 +625,7 @@ int f2fs_truncate(struct inode *inode, bool lock) return err; } - err = truncate_blocks(inode, i_size_read(inode), lock); + err = truncate_blocks(inode, i_size_read(inode), true); if (err) return err; @@ -689,7 +689,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_size <= i_size_read(inode)) { truncate_setsize(inode, attr->ia_size); - err = f2fs_truncate(inode, true); + err = f2fs_truncate(inode); if (err) return err; f2fs_balance_fs(F2FS_I_SB(inode), true); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 11cb60af815d..fb3d9bd597e3 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -355,7 +355,7 @@ void f2fs_evict_inode(struct inode *inode) i_size_write(inode, 0); retry: if (F2FS_HAS_BLOCKS(inode)) - err = f2fs_truncate(inode, true); + err = f2fs_truncate(inode); if (!err) { f2fs_lock_op(sbi); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9e754336e4e5..94bb87dd4b31 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -590,7 +590,7 @@ static int f2fs_drop_inode(struct inode *inode) f2fs_i_size_write(inode, 0); if (F2FS_HAS_BLOCKS(inode)) - f2fs_truncate(inode, true); + f2fs_truncate(inode); sb_end_intwrite(inode->i_sb); From 1e7c48fa9a34556639328d50780d12a304e0bb6d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 2 Jun 2016 14:15:56 -0700 Subject: [PATCH 24/68] f2fs: avoid data race between FI_DIRTY_INODE flag and update_inode FI_DIRTY_INODE flag is not covered by inode page lock, so it can be unset at any time like below. Thread #1 Thread #2 - lock_page(ipage) - update i_fields - update i_size/i_blocks/and so on - set FI_DIRTY_INODE - reset FI_DIRTY_INODE - set_page_dirty(ipage) In this case, we can lose the latest i_field information. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index fb3d9bd597e3..63c432673c71 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -231,6 +231,8 @@ int update_inode(struct inode *inode, struct page *node_page) { struct f2fs_inode *ri; + f2fs_inode_synced(inode); + f2fs_wait_on_page_writeback(node_page, NODE, true); ri = F2FS_INODE(node_page); @@ -265,7 +267,6 @@ int update_inode(struct inode *inode, struct page *node_page) __set_inode_rdev(inode, ri); set_cold_node(inode, node_page); - f2fs_inode_synced(inode); /* deleted inode */ if (inode->i_nlink == 0) From 29710bcf9426c84bb6a9b1d94316895ed6143813 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 2 Jun 2016 15:26:27 -0700 Subject: [PATCH 25/68] f2fs: fix wrong percentage This should be 1%, 10MB / 1GB. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 1f4f9d4569d9..2c2a797e18a8 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -23,7 +23,7 @@ #define MAX_RA_NODE 128 /* control the memory footprint threshold (10MB per 1GB ram) */ -#define DEF_RAM_THRESHOLD 10 +#define DEF_RAM_THRESHOLD 1 /* control dirty nats ratio threshold (default: 10% over max nid count) */ #define DEF_DIRTY_NAT_RATIO_THRESHOLD 10 From e589c2c477b44e06754508a4e8b883e5ae7294aa Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 2 Jun 2016 15:24:24 -0700 Subject: [PATCH 26/68] f2fs: control not to exceed # of cached nat entries This is to avoid cache entry management overhead including radix tree. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++++ fs/f2fs/node.h | 7 +++++++ fs/f2fs/segment.c | 5 +++++ 3 files changed, 16 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 16532b31dcd6..b448c8fec7fc 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -52,6 +52,10 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); + if (excess_cached_nats(sbi)) + res = false; + if (nm_i->nat_cnt > DEF_NAT_CACHE_THRESHOLD) + res = false; } else if (type == DIRTY_DENTS) { if (sbi->sb->s_bdi->wb.dirty_exceeded) return false; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 2c2a797e18a8..673ce926cf09 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -27,6 +27,8 @@ /* control dirty nats ratio threshold (default: 10% over max nid count) */ #define DEF_DIRTY_NAT_RATIO_THRESHOLD 10 +/* control total # of nats */ +#define DEF_NAT_CACHE_THRESHOLD 100000 /* vector size for gang look-up from nat cache that consists of radix tree */ #define NATVEC_SIZE 64 @@ -126,6 +128,11 @@ static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi) NM_I(sbi)->dirty_nats_ratio / 100; } +static inline bool excess_cached_nats(struct f2fs_sb_info *sbi) +{ + return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD; +} + enum mem_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES, /* indicates the cached nat entry */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 34a9159cf5ac..9011bffd1dd0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -345,6 +345,11 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { if (!need) return; + + /* balance_fs_bg is able to be pending */ + if (excess_cached_nats(sbi)) + f2fs_balance_fs_bg(sbi); + /* * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. From 7f319975ccea80cf03377ff579b0a9e613308570 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 3 Jun 2016 12:28:26 -0700 Subject: [PATCH 27/68] f2fs: set mapping error for EIO If EIO occurred, we need to set all the mapping to avoid any further IOs. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c9d6fe28ae1e..30dc448cae60 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1243,7 +1243,7 @@ write: /* we should bypass data pages to proceed the kworkder jobs */ if (unlikely(f2fs_cp_error(sbi))) { - SetPageError(page); + mapping_set_error(page->mapping, -EIO); goto out; } From 7dfeaa32204841aade36ba243a1cb45c54f42c15 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 4 Jun 2016 14:21:28 -0700 Subject: [PATCH 28/68] f2fs: avoid reverse IO order for NODE and DATA There is a data race between allocate_data_block() and f2fs_sbumit_page_mbio(), which incur unnecessary reversed bio submission. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 6 ++++++ fs/f2fs/super.c | 2 ++ 3 files changed, 9 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e21f9df133d1..24c7cde84905 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -766,6 +766,7 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info read_io; /* for read bios */ struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ + struct mutex wio_mutex[NODE + 1]; /* bio ordering for NODE/DATA */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9011bffd1dd0..7b58bfbd84a3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1399,11 +1399,17 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { int type = __get_segment_type(fio->page, fio->type); + if (fio->type == NODE || fio->type == DATA) + mutex_lock(&fio->sbi->wio_mutex[fio->type]); + allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type); /* writeout dirty page into bdev */ f2fs_submit_page_mbio(fio); + + if (fio->type == NODE || fio->type == DATA) + mutex_unlock(&fio->sbi->wio_mutex[fio->type]); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 94bb87dd4b31..dc66f1623e06 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1436,6 +1436,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); + mutex_init(&sbi->wio_mutex[NODE]); + mutex_init(&sbi->wio_mutex[DATA]); #ifdef CONFIG_F2FS_FS_ENCRYPTION memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX, From 19a5f5e2ef37f032efd840ada257bce2e91c8066 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 4 Jun 2016 14:25:24 -0700 Subject: [PATCH 29/68] f2fs: drop any block plugging In f2fs, we don't need to keep block plugging for NODE and DATA writes, since we already merged bios as much as possible. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ---- fs/f2fs/data.c | 17 ++++++++++------- fs/f2fs/gc.c | 5 ----- fs/f2fs/segment.c | 7 +------ 4 files changed, 11 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5ddd15cd6c6c..4179c7b971fc 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -897,11 +897,8 @@ static int block_operations(struct f2fs_sb_info *sbi) .nr_to_write = LONG_MAX, .for_reclaim = 0, }; - struct blk_plug plug; int err = 0; - blk_start_plug(&plug); - retry_flush_dents: f2fs_lock_all(sbi); /* write all the dirty dentry pages */ @@ -938,7 +935,6 @@ retry_flush_nodes: goto retry_flush_nodes; } out: - blk_finish_plug(&plug); return err; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 30dc448cae60..5f655d0c5b1f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -98,10 +98,13 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, } static inline void __submit_bio(struct f2fs_sb_info *sbi, int rw, - struct bio *bio) + struct bio *bio, enum page_type type) { - if (!is_read_io(rw)) + if (!is_read_io(rw)) { atomic_inc(&sbi->nr_wb_bios); + if (current->plug && (type == DATA || type == NODE)) + blk_finish_plug(current->plug); + } submit_bio(rw, bio); } @@ -117,7 +120,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) else trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); - __submit_bio(io->sbi, fio->rw, io->bio); + __submit_bio(io->sbi, fio->rw, io->bio, fio->type); io->bio = NULL; } @@ -235,7 +238,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) return -EFAULT; } - __submit_bio(fio->sbi, fio->rw, bio); + __submit_bio(fio->sbi, fio->rw, bio, fio->type); return 0; } @@ -1040,7 +1043,7 @@ got_it: */ if (bio && (last_block_in_bio != block_nr - 1)) { submit_and_realloc: - __submit_bio(F2FS_I_SB(inode), READ, bio); + __submit_bio(F2FS_I_SB(inode), READ, bio, DATA); bio = NULL; } if (bio == NULL) { @@ -1083,7 +1086,7 @@ set_error_page: goto next_page; confused: if (bio) { - __submit_bio(F2FS_I_SB(inode), READ, bio); + __submit_bio(F2FS_I_SB(inode), READ, bio, DATA); bio = NULL; } unlock_page(page); @@ -1093,7 +1096,7 @@ next_page: } BUG_ON(pages && !list_empty(pages)); if (bio) - __submit_bio(F2FS_I_SB(inode), READ, bio); + __submit_bio(F2FS_I_SB(inode), READ, bio, DATA); return 0; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 4a03076074af..67fd2855ccc9 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -777,7 +777,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, { struct page *sum_page; struct f2fs_summary_block *sum; - struct blk_plug plug; unsigned int segno = start_segno; unsigned int end_segno = start_segno + sbi->segs_per_sec; int seg_freed = 0; @@ -795,8 +794,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unlock_page(sum_page); } - blk_start_plug(&plug); - for (segno = start_segno; segno < end_segno; segno++) { /* find segment summary of victim */ sum_page = find_get_page(META_MAPPING(sbi), @@ -830,8 +827,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, f2fs_submit_merged_bio(sbi, (type == SUM_TYPE_NODE) ? NODE : DATA, WRITE); - blk_finish_plug(&plug); - if (gc_type == FG_GC) { while (start_segno < end_segno) if (get_valid_blocks(sbi, start_segno++, 1) == 0) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7b58bfbd84a3..eff046a792ad 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -379,13 +379,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) excess_prefree_segs(sbi) || excess_dirty_nats(sbi) || (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) { - if (test_opt(sbi, DATA_FLUSH)) { - struct blk_plug plug; - - blk_start_plug(&plug); + if (test_opt(sbi, DATA_FLUSH)) sync_dirty_inodes(sbi, FILE_INODE); - blk_finish_plug(&plug); - } f2fs_sync_fs(sbi->sb, true); stat_inc_bg_cp_count(sbi->stat_info); } From aa987273290d206b298e9d09db83e32ead661098 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 6 Jun 2016 18:49:54 -0700 Subject: [PATCH 30/68] f2fs: skip clean segment for gc If a segment in a section is clean or prefreed, we don't need to get its summary and do gc. Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 67fd2855ccc9..e1d274cdecb8 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -795,6 +795,10 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, } for (segno = start_segno; segno < end_segno; segno++) { + + if (get_valid_blocks(sbi, segno, 1) == 0) + continue; + /* find segment summary of victim */ sum_page = find_get_page(META_MAPPING(sbi), GET_SUM_BLOCK(sbi, segno)); From 36abef4e796d382e81a0c2d21ea5327481dd7154 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 3 Jun 2016 19:29:38 -0700 Subject: [PATCH 31/68] f2fs: introduce mode=lfs mount option This mount option is to enable original log-structured filesystem forcefully. So, there should be no random writes for main area. Especially, this supports host-managed SMR device. Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 3 +++ fs/f2fs/checkpoint.c | 2 +- fs/f2fs/data.c | 2 ++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 8 +++++++- fs/f2fs/recovery.c | 6 +++++- fs/f2fs/segment.c | 20 +++++++++++++++++++- fs/f2fs/segment.h | 7 +++++++ fs/f2fs/super.c | 28 ++++++++++++++++++++++++++++ 9 files changed, 74 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index e1c9f0849da6..3a5ce24021d9 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -151,6 +151,9 @@ noinline_data Disable the inline data feature, inline data feature is enabled by default. data_flush Enable data flushing before checkpoint in order to persist data of regular and symlink. +mode=%s Control block allocation mode which supports "adaptive" + and "lfs". In "lfs" mode, there should be no random + writes towards main area. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 4179c7b971fc..837e6bcad5ce 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -981,7 +981,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) * This avoids to conduct wrong roll-forward operations and uses * metapages, so should be called prior to sync_meta_pages below. */ - if (discard_next_dnode(sbi, discard_blk)) + if (!test_opt(sbi, LFS) && discard_next_dnode(sbi, discard_blk)) invalidate = true; /* Flush all the NAT/SIT pages */ diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5f655d0c5b1f..607ef4397330 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1710,6 +1710,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) return 0; + if (test_opt(F2FS_I_SB(inode), LFS)) + return 0; trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 24c7cde84905..82acdec022ef 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -111,6 +111,8 @@ static inline bool time_to_inject(int type) #define F2FS_MOUNT_FORCE_FG_GC 0x00004000 #define F2FS_MOUNT_DATA_FLUSH 0x00008000 #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 +#define F2FS_MOUNT_ADAPTIVE 0x00020000 +#define F2FS_MOUNT_LFS 0x00040000 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7a8d262bc488..b9d745ef5b08 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -878,9 +878,15 @@ static int __exchange_data_block(struct inode *inode, pgoff_t src, return full ? truncate_hole(inode, dst, dst + 1) : 0; if (do_replace) { - struct page *ipage = get_node_page(sbi, inode->i_ino); + struct page *ipage; struct node_info ni; + if (test_opt(sbi, LFS)) { + ret = -ENOTSUPP; + goto err_out; + } + + ipage = get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { ret = PTR_ERR(ipage); goto err_out; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b568b28c74f2..a39d84ab66b2 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -624,8 +624,12 @@ out: if (err) { bool invalidate = false; - if (discard_next_dnode(sbi, blkaddr)) + if (test_opt(sbi, LFS)) { + update_meta_page(sbi, NULL, blkaddr); invalidate = true; + } else if (discard_next_dnode(sbi, blkaddr)) { + invalidate = true; + } /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index eff046a792ad..4792f94089f7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -707,6 +707,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int start = 0, end = -1; + unsigned int secno, start_segno; mutex_lock(&dirty_i->seglist_lock); @@ -726,8 +727,22 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (!test_opt(sbi, DISCARD)) continue; - f2fs_issue_discard(sbi, START_BLOCK(sbi, start), + if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { + f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); + continue; + } +next: + secno = GET_SECNO(sbi, start); + start_segno = secno * sbi->segs_per_sec; + if (!IS_CURSEC(sbi, secno) && + !get_valid_blocks(sbi, start, sbi->segs_per_sec)) + f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), + sbi->segs_per_sec << sbi->log_blocks_per_seg); + + start = start_segno + sbi->segs_per_sec; + if (start < end) + goto next; } mutex_unlock(&dirty_i->seglist_lock); @@ -1221,6 +1236,9 @@ void allocate_new_segments(struct f2fs_sb_info *sbi) { int i; + if (test_opt(sbi, LFS)) + return; + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) __allocate_new_segments(sbi, i); } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 890bb28d2082..d74cc330ae13 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -470,6 +470,10 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi) { int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + + if (test_opt(sbi, LFS)) + return false; + return free_sections(sbi) <= (node_secs + 2 * dent_secs + reserved_sections(sbi) + 1); } @@ -533,6 +537,9 @@ static inline bool need_inplace_update(struct inode *inode) if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) return false; + if (test_opt(sbi, LFS)) + return false; + if (policy & (0x1 << F2FS_IPU_FORCE)) return true; if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi)) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index dc66f1623e06..edc736de8ee9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -94,6 +94,7 @@ enum { Opt_noextent_cache, Opt_noinline_data, Opt_data_flush, + Opt_mode, Opt_fault_injection, Opt_lazytime, Opt_nolazytime, @@ -123,6 +124,7 @@ static match_table_t f2fs_tokens = { {Opt_noextent_cache, "noextent_cache"}, {Opt_noinline_data, "noinline_data"}, {Opt_data_flush, "data_flush"}, + {Opt_mode, "mode=%s"}, {Opt_fault_injection, "fault_injection=%u"}, {Opt_lazytime, "lazytime"}, {Opt_nolazytime, "nolazytime"}, @@ -506,6 +508,25 @@ static int parse_options(struct super_block *sb, char *options) case Opt_data_flush: set_opt(sbi, DATA_FLUSH); break; + case Opt_mode: + name = match_strdup(&args[0]); + + if (!name) + return -ENOMEM; + if (strlen(name) == 8 && + !strncmp(name, "adaptive", 8)) { + set_opt(sbi, ADAPTIVE); + clear_opt(sbi, LFS); + } else if (strlen(name) == 3 && + !strncmp(name, "lfs", 3)) { + clear_opt(sbi, ADAPTIVE); + set_opt(sbi, LFS); + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; case Opt_fault_injection: if (args->from && match_int(args, &arg)) return -EINVAL; @@ -870,6 +891,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",noextent_cache"); if (test_opt(sbi, DATA_FLUSH)) seq_puts(seq, ",data_flush"); + + seq_puts(seq, ",mode="); + if (test_opt(sbi, ADAPTIVE)) + seq_puts(seq, "adaptive"); + else if (test_opt(sbi, LFS)) + seq_puts(seq, "lfs"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; @@ -953,6 +980,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, EXTENT_CACHE); sbi->sb->s_flags |= MS_LAZYTIME; set_opt(sbi, FLUSH_MERGE); + set_opt(sbi, ADAPTIVE); #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); From c92737ceecaf24643cc58182afdf9b13074cfb0d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 7 Jun 2016 14:34:22 -0700 Subject: [PATCH 32/68] f2fs: fix deadlock in add_link failure mkdir sync_dirty_inode - init_inode_metadata - lock_page(node) - make_empty_dir - filemap_fdatawrite() - do_writepages - lock_page(data) - write_page(data) - lock_page(node) - f2fs_init_acl - error - truncate_inode_pages - lock_page(data) So, we don't need to truncate data pages in this error case, which will be done by f2fs_evict_inode. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index f6ab3c2f8145..486482468abb 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -450,9 +450,6 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, return page; put_error: - /* truncate empty dir pages */ - truncate_inode_pages(&inode->i_data, 0); - clear_nlink(inode); update_inode(inode, page); f2fs_put_page(page, 1); From 8be0fea9c09d5be1693e50d1439f14878837fa4f Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Sat, 4 Jun 2016 22:01:28 +0800 Subject: [PATCH 33/68] f2fs: find parent dentry correctly If dotdot directory is corrupted, its slot may be ocupied by another file. In this case, dentry[1] is not the parent directory. Rename and cross-rename will update the inode in dentry[1] incorrectly. This patch finds dotdot dentry by name. Signed-off-by: Sheng Yong [Jaegeuk Kim: remove wron bug_on] Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 17 ++--------------- fs/f2fs/f2fs.h | 1 - fs/f2fs/inline.c | 19 ------------------- 3 files changed, 2 insertions(+), 35 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 486482468abb..72a0207f56a1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -267,22 +267,9 @@ out: struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) { - struct page *page; - struct f2fs_dir_entry *de; - struct f2fs_dentry_block *dentry_blk; + struct qstr dotdot = QSTR_INIT("..", 2); - if (f2fs_has_inline_dentry(dir)) - return f2fs_parent_inline_dir(dir, p); - - page = get_lock_data_page(dir, 0, false); - if (IS_ERR(page)) - return NULL; - - dentry_blk = kmap(page); - de = &dentry_blk->dentry[1]; - *p = page; - unlock_page(page); - return de; + return f2fs_find_entry(dir, &dotdot, p); } ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 82acdec022ef..cee4a77a0f0a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2275,7 +2275,6 @@ int f2fs_write_inline_data(struct inode *, struct page *); bool recover_inline_data(struct inode *, struct page *); struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct fscrypt_name *, struct page **); -struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *, nid_t, umode_t); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 669f92ffd111..e10e958250ff 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -306,25 +306,6 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, return de; } -struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *dir, - struct page **p) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct page *ipage; - struct f2fs_dir_entry *de; - struct f2fs_inline_dentry *dentry_blk; - - ipage = get_node_page(sbi, dir->i_ino); - if (IS_ERR(ipage)) - return NULL; - - dentry_blk = inline_data_addr(ipage); - de = &dentry_blk->dentry[1]; - *p = ipage; - unlock_page(ipage); - return de; -} - int make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage) { From 3e19886eda963f0c1438b2d1a40334d421cd09a2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Jun 2016 14:57:19 -0700 Subject: [PATCH 34/68] f2fs: report error for f2fs_parent_dir If there is no dentry, we can report its error correctly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 78efe00a3a2f..618829e8049c 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -662,10 +662,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; if (S_ISDIR(old_inode->i_mode)) { - err = -EIO; old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); - if (!old_dir_entry) + if (!old_dir_entry) { + err = PTR_ERR(old_dir_page); goto out_old; + } } if (flags & RENAME_WHITEOUT) { @@ -838,19 +839,21 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, /* prepare for updating ".." directory entry info later */ if (old_dir != new_dir) { if (S_ISDIR(old_inode->i_mode)) { - err = -EIO; old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); - if (!old_dir_entry) + if (!old_dir_entry) { + err = PTR_ERR(old_dir_page); goto out_new; + } } if (S_ISDIR(new_inode->i_mode)) { - err = -EIO; new_dir_entry = f2fs_parent_dir(new_inode, &new_dir_page); - if (!new_dir_entry) + if (!new_dir_entry) { + err = PTR_ERR(new_dir_page); goto out_old_dir; + } } } From 67c3758d2267de589ee9a8856fe637cce85993d9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Jun 2016 18:27:02 -0700 Subject: [PATCH 35/68] f2fs: call update_inode_page for orphan inodes Let's store orphan inode pages right away. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 +++--- fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inode.c | 4 +--- fs/f2fs/namei.c | 4 ++-- fs/f2fs/super.c | 16 +--------------- 6 files changed, 9 insertions(+), 25 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 837e6bcad5ce..8534b98c0712 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -508,10 +508,11 @@ void release_orphan_inode(struct f2fs_sb_info *sbi) spin_unlock(&im->ino_lock); } -void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +void add_orphan_inode(struct inode *inode) { /* add new orphan ino entry into list */ - __add_ino_entry(sbi, ino, ORPHAN_INO); + __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO); + update_inode_page(inode); } void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) @@ -535,7 +536,6 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) } clear_nlink(inode); - mark_inode_dirty_sync(inode); /* truncate all the data during iput */ iput(inode); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 72a0207f56a1..7ba52a04e13a 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -666,7 +666,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode) up_write(&F2FS_I(inode)->i_sem); if (inode->i_nlink == 0) - add_orphan_inode(sbi, inode->i_ino); + add_orphan_inode(inode); else release_orphan_inode(sbi); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cee4a77a0f0a..bbcd4688f97b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2040,7 +2040,7 @@ bool exist_written_data(struct f2fs_sb_info *, nid_t, int); int f2fs_sync_inode_meta(struct f2fs_sb_info *); int acquire_orphan_inode(struct f2fs_sb_info *); void release_orphan_inode(struct f2fs_sb_info *); -void add_orphan_inode(struct f2fs_sb_info *, nid_t); +void add_orphan_inode(struct inode *); void remove_orphan_inode(struct f2fs_sb_info *, nid_t); int recover_orphan_inodes(struct f2fs_sb_info *); int get_valid_checkpoint(struct f2fs_sb_info *); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 63c432673c71..9221ca22720c 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -393,8 +393,6 @@ no_delete: !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); out_clear: fscrypt_put_encryption_info(inode, NULL); - - f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); clear_inode(inode); } @@ -421,7 +419,7 @@ void handle_failed_inode(struct inode *inode) f2fs_msg(sbi->sb, KERN_WARNING, "Too many orphan inodes, run fsck to fix."); } else { - add_orphan_inode(sbi, inode->i_ino); + add_orphan_inode(inode); } alloc_nid_done(sbi, inode->i_ino); } else { diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 618829e8049c..4460400133cf 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -598,7 +598,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, * add this non-linked tmpfile to orphan list, in this way we could * remove all unused data of tmpfile after abnormal power-off. */ - add_orphan_inode(sbi, inode->i_ino); + add_orphan_inode(inode); alloc_nid_done(sbi, inode->i_ino); if (whiteout) { @@ -712,7 +712,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, up_write(&F2FS_I(new_inode)->i_sem); if (!new_inode->i_nlink) - add_orphan_inode(sbi, new_inode->i_ino); + add_orphan_inode(new_inode); else release_orphan_inode(sbi); } else { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index edc736de8ee9..41347c0829cd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -585,8 +585,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) static int f2fs_drop_inode(struct inode *inode) { - int ret; - /* * This is to avoid a deadlock condition like below. * writeback_single_inode(inode) @@ -622,19 +620,7 @@ static int f2fs_drop_inode(struct inode *inode) return 0; } - ret = generic_drop_inode(inode); - if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { - if (ret) - inode->i_state |= I_WILL_FREE; - spin_unlock(&inode->i_lock); - - update_inode_page(inode); - - spin_lock(&inode->i_lock); - if (ret) - inode->i_state &= ~I_WILL_FREE; - } - return ret; + return generic_drop_inode(inode); } /* From 52763a4b7a2112743745c5bbfe43fe6f54d4b39a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Jun 2016 09:47:48 -0700 Subject: [PATCH 36/68] f2fs: detect host-managed SMR by feature flag If mkfs.f2fs gives a feature flag for host-managed SMR, we can set mode=lfs by default. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 ++- fs/f2fs/f2fs.h | 21 +++++++++++++++++++++ fs/f2fs/segment.c | 3 ++- fs/f2fs/super.c | 16 ++++++++++------ 4 files changed, 35 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 607ef4397330..08325280d03c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -102,7 +102,8 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, int rw, { if (!is_read_io(rw)) { atomic_inc(&sbi->nr_wb_bios); - if (current->plug && (type == DATA || type == NODE)) + if (f2fs_sb_mounted_hmsmr(sbi->sb) && + current->plug && (type == DATA || type == NODE)) blk_finish_plug(current->plug); } submit_bio(rw, bio); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bbcd4688f97b..b3aeb58a6285 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -133,6 +133,7 @@ struct f2fs_mount_info { }; #define F2FS_FEATURE_ENCRYPT 0x0001 +#define F2FS_FEATURE_HMSMR 0x0002 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -2334,6 +2335,26 @@ static inline int f2fs_sb_has_crypto(struct super_block *sb) return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT); } +static inline int f2fs_sb_mounted_hmsmr(struct super_block *sb) +{ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_HMSMR); +} + +static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) +{ + clear_opt(sbi, ADAPTIVE); + clear_opt(sbi, LFS); + + switch (mt) { + case F2FS_MOUNT_ADAPTIVE: + set_opt(sbi, ADAPTIVE); + break; + case F2FS_MOUNT_LFS: + set_opt(sbi, LFS); + break; + } +} + static inline bool f2fs_may_encrypt(struct inode *inode) { #ifdef CONFIG_F2FS_FS_ENCRYPTION diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4792f94089f7..782975e791f1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2402,7 +2402,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); sm_info->rec_prefree_segments = sm_info->main_segments * DEF_RECLAIM_PREFREE_SEGMENTS / 100; - sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; + if (!test_opt(sbi, LFS)) + sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 41347c0829cd..8c698e183d2b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -515,12 +515,10 @@ static int parse_options(struct super_block *sb, char *options) return -ENOMEM; if (strlen(name) == 8 && !strncmp(name, "adaptive", 8)) { - set_opt(sbi, ADAPTIVE); - clear_opt(sbi, LFS); + set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); } else if (strlen(name) == 3 && !strncmp(name, "lfs", 3)) { - clear_opt(sbi, ADAPTIVE); - set_opt(sbi, LFS); + set_opt_mode(sbi, F2FS_MOUNT_LFS); } else { kfree(name); return -EINVAL; @@ -966,7 +964,12 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, EXTENT_CACHE); sbi->sb->s_flags |= MS_LAZYTIME; set_opt(sbi, FLUSH_MERGE); - set_opt(sbi, ADAPTIVE); + if (f2fs_sb_mounted_hmsmr(sbi->sb)) { + set_opt_mode(sbi, F2FS_MOUNT_LFS); + set_opt(sbi, DISCARD); + } else { + set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); + } #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); @@ -1615,6 +1618,8 @@ try_onemore: goto free_sbi; sb->s_fs_info = sbi; + sbi->raw_super = raw_super; + default_options(sbi); /* parse mount options */ options = kstrdup((const char *)data, GFP_KERNEL); @@ -1644,7 +1649,6 @@ try_onemore: memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); /* init f2fs-specific super block info */ - sbi->raw_super = raw_super; sbi->valid_super_block = valid_super_block; mutex_init(&sbi->gc_mutex); mutex_init(&sbi->cp_mutex); From ad4edb83143fdeef9e6fdd9daaa735b59476565b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 16 Jun 2016 16:41:49 -0700 Subject: [PATCH 37/68] f2fs: produce more nids and reduce readahead nats The readahead nat pages are more likely to be reclaimed quickly, so it'd better to gather more free nids in advance. And, let's keep some free nids as much as possible. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 ++ fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 9 ++++++--- fs/f2fs/node.h | 5 +++-- fs/f2fs/segment.c | 4 +++- fs/f2fs/shrinker.c | 5 +++-- 6 files changed, 18 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 8534b98c0712..2b43d4013e92 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -941,6 +941,8 @@ out: static void unblock_operations(struct f2fs_sb_info *sbi) { up_write(&sbi->node_write); + + build_free_nids(sbi); f2fs_unlock_all(sbi); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b3aeb58a6285..32884a7bdcc4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1965,6 +1965,7 @@ void move_node_page(struct page *, int); int fsync_node_pages(struct f2fs_sb_info *, struct inode *, struct writeback_control *, bool); int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *); +void build_free_nids(struct f2fs_sb_info *); bool alloc_nid(struct f2fs_sb_info *, nid_t *); void alloc_nid_done(struct f2fs_sb_info *, nid_t); void alloc_nid_failed(struct f2fs_sb_info *, nid_t); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b448c8fec7fc..729fb1eb86ce 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1765,7 +1765,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, } } -static void build_free_nids(struct f2fs_sb_info *sbi) +void build_free_nids(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -1774,7 +1774,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) nid_t nid = nm_i->next_scan_nid; /* Enough entries */ - if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK) + if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK) return; /* readahead nat pages to be scanned */ @@ -1912,12 +1912,15 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) struct free_nid *i, *next; int nr = nr_shrink; + if (nm_i->fcnt <= MAX_FREE_NIDS) + return 0; + if (!mutex_trylock(&nm_i->build_lock)) return 0; spin_lock(&nm_i->free_nid_list_lock); list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { - if (nr_shrink <= 0 || nm_i->fcnt <= NAT_ENTRY_PER_BLOCK) + if (nr_shrink <= 0 || nm_i->fcnt <= MAX_FREE_NIDS) break; if (i->state == NID_ALLOC) continue; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 673ce926cf09..fc7684554b1a 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -15,9 +15,10 @@ #define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) /* # of pages to perform synchronous readahead before building free nids */ -#define FREE_NID_PAGES 4 +#define FREE_NID_PAGES 8 +#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) -#define DEF_RA_NID_PAGES 4 /* # of nid pages to be readaheaded */ +#define DEF_RA_NID_PAGES 0 /* # of nid pages to be readaheaded */ /* maximum readahead size for node during getting data blocks */ #define MAX_RA_NODE 128 diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 782975e791f1..6d16ecf9d29e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -371,7 +371,9 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); if (!available_free_memory(sbi, FREE_NIDS)) - try_to_free_nids(sbi, NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES); + try_to_free_nids(sbi, MAX_FREE_NIDS); + else + build_free_nids(sbi); /* checkpoint is the only way to shrink partial cached entries */ if (!available_free_memory(sbi, NAT_ENTRIES) || diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 93606f281bf9..46c915425923 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -13,6 +13,7 @@ #include #include "f2fs.h" +#include "node.h" static LIST_HEAD(f2fs_list); static DEFINE_SPINLOCK(f2fs_list_lock); @@ -25,8 +26,8 @@ static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi) static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) { - if (NM_I(sbi)->fcnt > NAT_ENTRY_PER_BLOCK) - return NM_I(sbi)->fcnt - NAT_ENTRY_PER_BLOCK; + if (NM_I(sbi)->fcnt > MAX_FREE_NIDS) + return NM_I(sbi)->fcnt - MAX_FREE_NIDS; return 0; } From 2c237ebaa440b8c641c61cf66802521a917fc30c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 16 Jun 2016 16:44:11 -0700 Subject: [PATCH 38/68] f2fs: avoid writing node/metapages during writes Let's keep more node/meta pages in run time. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index d74cc330ae13..57d450fb4643 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -715,9 +715,9 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) if (type == DATA) return sbi->blocks_per_seg; else if (type == NODE) - return 3 * sbi->blocks_per_seg; + return 8 * sbi->blocks_per_seg; else if (type == META) - return MAX_BIO_BLOCKS(sbi); + return 8 * MAX_BIO_BLOCKS(sbi); else return 0; } @@ -736,7 +736,7 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type, nr_to_write = wbc->nr_to_write; if (type == NODE) - desired = 3 * max_hw_blocks(sbi); + desired = 2 * max_hw_blocks(sbi); else desired = MAX_BIO_BLOCKS(sbi); From ac6f199984a667eb017897b8528f7687eac8fa45 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 16 Jun 2016 17:03:23 -0700 Subject: [PATCH 39/68] f2fs: avoid latency-critical readahead of node pages The f2fs_map_blocks is very related to the performance, so let's avoid any latency to read ahead node pages. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/file.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 08325280d03c..ba4963f51bee 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -652,7 +652,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, unsigned int maxblocks = map->m_len; struct dnode_of_data dn; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; + int mode = create ? ALLOC_NODE : LOOKUP_NODE; pgoff_t pgofs, end_offset, end; int err = 0, ofs = 1; unsigned int ofs_in_node, last_ofs_in_node; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b9d745ef5b08..d07e7759f970 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -354,7 +354,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) { set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); + err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE); if (err && err != -ENOENT) { goto fail; } else if (err == -ENOENT) { From 78682f79447998369a85f12b6437fa8fdbbdca50 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 3 Jul 2016 22:05:11 +0800 Subject: [PATCH 40/68] f2fs: fix to avoid reading out encrypted data in page cache For encrypted inode, if user overwrites data of the inode, f2fs will read encrypted data into page cache, and then do the decryption. However reader can race with overwriter, and it will see encrypted data which has not been decrypted by overwriter yet. Fix it by moving decrypting work to background and keep page non-uptodated until data is decrypted. Thread A Thread B - f2fs_file_write_iter - __generic_file_write_iter - generic_perform_write - f2fs_write_begin - f2fs_submit_page_bio - generic_file_read_iter - do_generic_file_read - lock_page_killable - unlock_page - copy_page_to_iter hit the encrypted data in updated page - lock_page - fscrypt_decrypt_page Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 90 ++++++++++++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ba4963f51bee..53fae38009d3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -955,6 +955,37 @@ out: return ret; } +struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr, + unsigned nr_pages) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct fscrypt_ctx *ctx = NULL; + struct block_device *bdev = sbi->sb->s_bdev; + struct bio *bio; + + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + ctx = fscrypt_get_ctx(inode, GFP_NOFS); + if (IS_ERR(ctx)) + return ERR_CAST(ctx); + + /* wait the page to be moved by cleaning */ + f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr); + } + + bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); + if (!bio) { + if (ctx) + fscrypt_release_ctx(ctx); + return ERR_PTR(-ENOMEM); + } + bio->bi_bdev = bdev; + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blkaddr); + bio->bi_end_io = f2fs_read_end_io; + bio->bi_private = ctx; + + return bio; +} + /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. @@ -973,7 +1004,6 @@ static int f2fs_mpage_readpages(struct address_space *mapping, sector_t last_block; sector_t last_block_in_file; sector_t block_nr; - struct block_device *bdev = inode->i_sb->s_bdev; struct f2fs_map_blocks map; map.m_pblk = 0; @@ -1048,31 +1078,9 @@ submit_and_realloc: bio = NULL; } if (bio == NULL) { - struct fscrypt_ctx *ctx = NULL; - - if (f2fs_encrypted_inode(inode) && - S_ISREG(inode->i_mode)) { - - ctx = fscrypt_get_ctx(inode, GFP_NOFS); - if (IS_ERR(ctx)) - goto set_error_page; - - /* wait the page to be moved by cleaning */ - f2fs_wait_on_encrypted_page_writeback( - F2FS_I_SB(inode), block_nr); - } - - bio = bio_alloc(GFP_KERNEL, - min_t(int, nr_pages, BIO_MAX_PAGES)); - if (!bio) { - if (ctx) - fscrypt_release_ctx(ctx); + bio = f2fs_grab_bio(inode, block_nr, nr_pages); + if (IS_ERR(bio)) goto set_error_page; - } - bio->bi_bdev = bdev; - bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr); - bio->bi_end_io = f2fs_read_end_io; - bio->bi_private = ctx; } if (bio_add_page(bio, page, blocksize, 0) < blocksize) @@ -1622,18 +1630,21 @@ repeat: if (blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); } else { - struct f2fs_io_info fio = { - .sbi = sbi, - .type = DATA, - .rw = READ_SYNC, - .old_blkaddr = blkaddr, - .new_blkaddr = blkaddr, - .page = page, - .encrypted_page = NULL, - }; - err = f2fs_submit_page_bio(&fio); - if (err) + struct bio *bio; + + bio = f2fs_grab_bio(inode, blkaddr, 1); + if (IS_ERR(bio)) { + err = PTR_ERR(bio); goto fail; + } + + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + bio_put(bio); + err = -EFAULT; + goto fail; + } + + __submit_bio(sbi, READ_SYNC, bio, DATA); lock_page(page); if (unlikely(!PageUptodate(page))) { @@ -1644,13 +1655,6 @@ repeat: f2fs_put_page(page, 1); goto repeat; } - - /* avoid symlink page */ - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { - err = fscrypt_decrypt_page(page); - if (err) - goto fail; - } } out_update: SetPageUptodate(page); From 1563ac75e7e45adcdc1271e6bb55fe27a23d4e4e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 3 Jul 2016 22:05:12 +0800 Subject: [PATCH 41/68] f2fs: fix to detect truncation prior rather than EIO during read In procedure of synchonized read, after sending out the read request, reader will try to lock the page for waiting device to finish the read jobs and unlock the page, but meanwhile, truncater will race with reader, so after reader get lock of the page, it should check page's mapping to detect whether someone has truncated the page in advance, then reader has the chance to do the retry if truncation was done, otherwise read can be failed due to previous condition check. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 16 ++++++++-------- fs/f2fs/gc.c | 4 ++-- fs/f2fs/node.c | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 53fae38009d3..3d93cf184114 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -500,14 +500,14 @@ repeat: /* wait for read completion */ lock_page(page); - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } + if (unlikely(!PageUptodate(page))) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } return page; } @@ -1647,14 +1647,14 @@ repeat: __submit_bio(sbi, READ_SYNC, bio, DATA); lock_page(page); - if (unlikely(!PageUptodate(page))) { - err = -EIO; - goto fail; - } if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } + if (unlikely(!PageUptodate(page))) { + err = -EIO; + goto fail; + } } out_update: SetPageUptodate(page); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e1d274cdecb8..c9602d0dc57a 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -593,11 +593,11 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) /* write page */ lock_page(fio.encrypted_page); - if (unlikely(!PageUptodate(fio.encrypted_page))) { + if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) { err = -EIO; goto put_page_out; } - if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) { + if (unlikely(!PageUptodate(fio.encrypted_page))) { err = -EIO; goto put_page_out; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 729fb1eb86ce..69171ce9b4b1 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1146,13 +1146,13 @@ repeat: lock_page(page); - if (unlikely(!PageUptodate(page))) - goto out_err; - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { f2fs_put_page(page, 1); goto repeat; } + + if (unlikely(!PageUptodate(page))) + goto out_err; page_hit: if(unlikely(nid != nid_of_node(page))) { f2fs_bug_on(sbi, 1); From 72e1c797b57da42ce8f75c5636720f40c4f607a2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 3 Jul 2016 22:05:13 +0800 Subject: [PATCH 42/68] f2fs: fix to redirty page if fail to gc data page If we fail to move data page during foreground GC, we should give another chance to writeback that page which was set dirty previously by writer. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c9602d0dc57a..c61213785914 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -653,12 +653,23 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type) .page = page, .encrypted_page = NULL, }; + bool is_dirty = PageDirty(page); + int err; + +retry: set_page_dirty(page); f2fs_wait_on_page_writeback(page, DATA, true); if (clear_page_dirty_for_io(page)) inode_dec_dirty_pages(inode); + set_cold_data(page); - do_write_data_page(&fio); + + err = do_write_data_page(&fio); + if (err == -ENOMEM && is_dirty) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + clear_cold_data(page); } out: From 64058be9c8e3579d7055e0b01d2bd9b294db3998 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 3 Jul 2016 22:05:14 +0800 Subject: [PATCH 43/68] f2fs: add nodiscard mount option This patch adds 'nodiscard' mount option. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 4 +++- fs/f2fs/super.c | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 3a5ce24021d9..ecd808088362 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -109,7 +109,9 @@ background_gc=%s Turn on/off cleaning operations, namely garbage disable_roll_forward Disable the roll-forward recovery routine norecovery Disable the roll-forward recovery routine, mounted read- only (i.e., -o ro,disable_roll_forward) -discard Issue discard/TRIM commands when a segment is cleaned. +discard/nodiscard Enable/disable real-time discard in f2fs, if discard is + enabled, f2fs will issue discard/TRIM commands when a + segment is cleaned. no_heap Disable heap-style segment allocation which finds free segments for data from the beginning of main area, while for node from the end of main area. diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8c698e183d2b..e0a975dc6fc6 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -76,6 +76,7 @@ enum { Opt_disable_roll_forward, Opt_norecovery, Opt_discard, + Opt_nodiscard, Opt_noheap, Opt_user_xattr, Opt_nouser_xattr, @@ -106,6 +107,7 @@ static match_table_t f2fs_tokens = { {Opt_disable_roll_forward, "disable_roll_forward"}, {Opt_norecovery, "norecovery"}, {Opt_discard, "discard"}, + {Opt_nodiscard, "nodiscard"}, {Opt_noheap, "no_heap"}, {Opt_user_xattr, "user_xattr"}, {Opt_nouser_xattr, "nouser_xattr"}, @@ -426,6 +428,8 @@ static int parse_options(struct super_block *sb, char *options) "the device does not support discard"); } break; + case Opt_nodiscard: + clear_opt(sbi, DISCARD); case Opt_noheap: set_opt(sbi, NOHEAP); break; From a0995af69554cecd55c8d2b8c4e4418b84737fd0 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 28 Jun 2016 07:27:59 +0800 Subject: [PATCH 44/68] f2fs: remove unnecessary goto statement When base_addr is NULL, there is no need to call kzfree, it should return -ENOMEM directly. Additionally, it is better to initialize variable 'error' with 0. Signed-off-by: Tiezhu Yang Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 8c0a3b36a917..2e8cb873c31e 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -447,7 +447,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, int found, newsize; size_t len; __u32 new_hsize; - int error = -ENOMEM; + int error = 0; if (name == NULL) return -EINVAL; @@ -465,7 +465,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, base_addr = read_all_xattrs(inode, ipage); if (!base_addr) - goto exit; + return -ENOMEM; /* find entry with wanted name. */ here = __find_xattr(base_addr, index, len, name); From fe76b796fc5194cc3d57265002e3a748566d073f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Jun 2016 18:40:10 -0700 Subject: [PATCH 45/68] f2fs: introduce f2fs_set_page_dirty_nobuffer This patch adds f2fs_set_page_dirty_nobuffer() copied from __set_page_dirty_buffer. When appending 4KB blocks in f2fs on pmem with multiple cores, this improves the overall performance. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/data.c | 33 ++++++++++++++++++++++++++++++++- fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 2 +- 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2b43d4013e92..2755ef730a41 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -366,7 +366,7 @@ static int f2fs_set_meta_page_dirty(struct page *page) SetPageUptodate(page); if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); + f2fs_set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); SetPagePrivate(page); f2fs_trace_pid(page); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3d93cf184114..4a2e97dad9e9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include "f2fs.h" @@ -1775,6 +1777,35 @@ int f2fs_release_page(struct page *page, gfp_t wait) return 1; } +/* + * This was copied from __set_page_dirty_buffers which gives higher performance + * in very high speed storages. (e.g., pmem) + */ +void f2fs_set_page_dirty_nobuffers(struct page *page) +{ + struct address_space *mapping = page->mapping; + unsigned long flags; + + if (unlikely(!mapping)) + return; + + spin_lock(&mapping->private_lock); + lock_page_memcg(page); + SetPageDirty(page); + spin_unlock(&mapping->private_lock); + + spin_lock_irqsave(&mapping->tree_lock, flags); + WARN_ON_ONCE(!PageUptodate(page)); + account_page_dirtied(page, mapping); + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + unlock_page_memcg(page); + + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + return; +} + static int f2fs_set_data_page_dirty(struct page *page) { struct address_space *mapping = page->mapping; @@ -1797,7 +1828,7 @@ static int f2fs_set_data_page_dirty(struct page *page) } if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); + f2fs_set_page_dirty_nobuffers(page); update_dirty_page(inode, page); return 1; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 32884a7bdcc4..096f16d343a8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2077,6 +2077,7 @@ struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int do_write_data_page(struct f2fs_io_info *); int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); +void f2fs_set_page_dirty_nobuffers(struct page *); void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); int f2fs_release_page(struct page *, gfp_t); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 69171ce9b4b1..db73f3c823dc 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1646,7 +1646,7 @@ static int f2fs_set_node_page_dirty(struct page *page) SetPageUptodate(page); if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); + f2fs_set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); SetPagePrivate(page); f2fs_trace_pid(page); From 237c0790e54020d522b8fd23097e8dcafb4e331d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Jun 2016 18:49:15 -0700 Subject: [PATCH 46/68] f2fs: call SetPageUptodate if needed SetPageUptodate() issues memory barrier, resulting in performance degrdation. Let's avoid that. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 ++++-- fs/f2fs/data.c | 18 ++++++++++++------ fs/f2fs/file.c | 3 ++- fs/f2fs/inline.c | 9 ++++++--- fs/f2fs/node.c | 9 ++++++--- 5 files changed, 30 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2755ef730a41..8ea895389ae4 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -48,7 +48,8 @@ repeat: goto repeat; } f2fs_wait_on_page_writeback(page, META, true); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); return page; } @@ -364,7 +365,8 @@ static int f2fs_set_meta_page_dirty(struct page *page) { trace_f2fs_set_page_dirty(page, META); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); if (!PageDirty(page)) { f2fs_set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4a2e97dad9e9..b6fd5bd05e41 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -47,7 +47,8 @@ static void f2fs_read_end_io(struct bio *bio) struct page *page = bvec->bv_page; if (!bio->bi_error) { - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); } else { ClearPageUptodate(page); SetPageError(page); @@ -443,7 +444,8 @@ got_it: */ if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); unlock_page(page); return page; } @@ -554,7 +556,8 @@ struct page *get_new_data_page(struct inode *inode, if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); } else { f2fs_put_page(page, 1); @@ -1065,7 +1068,8 @@ got_it: } } else { zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); unlock_page(page); goto next_page; } @@ -1659,7 +1663,8 @@ repeat: } } out_update: - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); out_clear: clear_cold_data(page); return 0; @@ -1813,7 +1818,8 @@ static int f2fs_set_data_page_dirty(struct page *page) trace_f2fs_set_page_dirty(page, DATA); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); if (f2fs_is_atomic_file(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d07e7759f970..2b777a42bf43 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -81,7 +81,8 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, zero_user_segment(page, offset, PAGE_SIZE); } set_page_dirty(page); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); trace_f2fs_vm_page_mkwrite(page, DATA); mapped: diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e10e958250ff..2cd0edcc4ebc 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -59,7 +59,8 @@ void read_inline_data(struct page *page, struct page *ipage) memcpy(dst_addr, src_addr, MAX_INLINE_DATA); flush_dcache_page(page); kunmap_atomic(dst_addr); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); } bool truncate_inline_inode(struct page *ipage, u64 from) @@ -97,7 +98,8 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) else read_inline_data(page, ipage); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); f2fs_put_page(ipage, 1); unlock_page(page); return 0; @@ -370,7 +372,8 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, NR_INLINE_DENTRY * F2FS_SLOT_LEN); kunmap_atomic(dentry_blk); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); set_page_dirty(page); /* clear inline dir and flag after data writeback */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index db73f3c823dc..ca1bb3cc6c32 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1045,7 +1045,8 @@ struct page *new_node_page(struct dnode_of_data *dn, f2fs_wait_on_page_writeback(page, NODE, true); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(dn->inode, page); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); if (set_page_dirty(page)) dn->node_changed = true; @@ -1644,7 +1645,8 @@ static int f2fs_set_node_page_dirty(struct page *page) { trace_f2fs_set_page_dirty(page, NODE); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); if (!PageDirty(page)) { f2fs_set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); @@ -2015,7 +2017,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* Should not use this inode from free nid list */ remove_free_nid(NM_I(sbi), ino); - SetPageUptodate(ipage); + if (!PageUptodate(ipage)) + SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); src = F2FS_INODE(page); From 2555a2d55822c9b2e1a933933dedd5d172067cea Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Jun 2016 19:02:06 -0700 Subject: [PATCH 47/68] f2fs: shrink critical region in spin_lock This patch shrinks the critical region in spin_lock. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 096f16d343a8..d5892f56c44d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1136,30 +1136,23 @@ static inline void f2fs_i_blocks_write(struct inode *, blkcnt_t, bool); static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count) { - block_t valid_block_count; - - spin_lock(&sbi->stat_lock); #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(FAULT_BLOCK)) { - spin_unlock(&sbi->stat_lock); + if (time_to_inject(FAULT_BLOCK)) return false; - } #endif - valid_block_count = - sbi->total_valid_block_count + (block_t)(*count); - if (unlikely(valid_block_count > sbi->user_block_count)) { - *count = sbi->user_block_count - sbi->total_valid_block_count; + spin_lock(&sbi->stat_lock); + sbi->total_valid_block_count += (block_t)(*count); + if (unlikely(sbi->total_valid_block_count > sbi->user_block_count)) { + *count -= sbi->total_valid_block_count - sbi->user_block_count; + sbi->total_valid_block_count = sbi->user_block_count; if (!*count) { spin_unlock(&sbi->stat_lock); return false; } } - /* *count can be recalculated */ - f2fs_i_blocks_write(inode, *count, true); - sbi->total_valid_block_count = - sbi->total_valid_block_count + (block_t)(*count); spin_unlock(&sbi->stat_lock); + f2fs_i_blocks_write(inode, *count, true); percpu_counter_add(&sbi->alloc_valid_block_count, (*count)); return true; } @@ -1171,9 +1164,9 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); f2fs_bug_on(sbi, inode->i_blocks < count); - f2fs_i_blocks_write(inode, count, false); sbi->total_valid_block_count -= (block_t)count; spin_unlock(&sbi->stat_lock); + f2fs_i_blocks_write(inode, count, false); } static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) From 3bdad3c7ee72a76ec87be3477eb958ed7ca304fc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Jun 2016 19:04:16 -0700 Subject: [PATCH 48/68] f2fs: skip to check the block address of node page If the node page is up-to-date, it should be alive. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ca1bb3cc6c32..9d994b97e61f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1080,6 +1080,9 @@ static int read_node_page(struct page *page, int rw) .encrypted_page = NULL, }; + if (PageUptodate(page)) + return LOCKED_PAGE; + get_node_info(sbi, page->index, &ni); if (unlikely(ni.blk_addr == NULL_ADDR)) { @@ -1087,9 +1090,6 @@ static int read_node_page(struct page *page, int rw) return -ENOENT; } - if (PageUptodate(page)) - return LOCKED_PAGE; - fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr; return f2fs_submit_page_bio(&fio); } From ec795418c41850056feb956534edf059dc1155d4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Jun 2016 19:59:11 -0700 Subject: [PATCH 49/68] f2fs: use percpu_rw_semaphore This patch replaces rw_semaphore with percpu_rw_semaphore for: sbi->cp_rwsem nm_i->nat_tree_lock Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 12 ++++++------ fs/f2fs/node.c | 47 ++++++++++++++++++++++++----------------------- fs/f2fs/super.c | 6 +++++- 3 files changed, 35 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d5892f56c44d..88fa13909b9c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -527,7 +527,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ + struct percpu_rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ @@ -775,7 +775,7 @@ struct f2fs_sb_info { struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ struct inode *meta_inode; /* cache meta blocks */ struct mutex cp_mutex; /* checkpoint procedure lock */ - struct rw_semaphore cp_rwsem; /* blocking FS operations */ + struct percpu_rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ wait_queue_head_t cp_wait; unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ @@ -1062,22 +1062,22 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) { - down_read(&sbi->cp_rwsem); + percpu_down_read(&sbi->cp_rwsem); } static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) { - up_read(&sbi->cp_rwsem); + percpu_up_read(&sbi->cp_rwsem); } static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) { - down_write(&sbi->cp_rwsem); + percpu_down_write(&sbi->cp_rwsem); } static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) { - up_write(&sbi->cp_rwsem); + percpu_up_write(&sbi->cp_rwsem); } static inline int __get_cp_reason(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9d994b97e61f..b841c439adb7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -206,14 +206,14 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool need = false; - down_read(&nm_i->nat_tree_lock); + percpu_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { if (!get_nat_flag(e, IS_CHECKPOINTED) && !get_nat_flag(e, HAS_FSYNCED_INODE)) need = true; } - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); return need; } @@ -223,11 +223,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool is_cp = true; - down_read(&nm_i->nat_tree_lock); + percpu_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); return is_cp; } @@ -237,13 +237,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - down_read(&nm_i->nat_tree_lock); + percpu_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); return need_update; } @@ -284,7 +284,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; - down_write(&nm_i->nat_tree_lock); + percpu_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = grab_nat_entry(nm_i, ni->nid); @@ -334,7 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - up_write(&nm_i->nat_tree_lock); + percpu_up_write(&nm_i->nat_tree_lock); } int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -342,8 +342,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) struct f2fs_nm_info *nm_i = NM_I(sbi); int nr = nr_shrink; - if (!down_write_trylock(&nm_i->nat_tree_lock)) - return 0; + percpu_down_write(&nm_i->nat_tree_lock); while (nr_shrink && !list_empty(&nm_i->nat_entries)) { struct nat_entry *ne; @@ -352,7 +351,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) __del_from_nat_cache(nm_i, ne); nr_shrink--; } - up_write(&nm_i->nat_tree_lock); + percpu_up_write(&nm_i->nat_tree_lock); return nr - nr_shrink; } @@ -374,13 +373,13 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) ni->nid = nid; /* Check nat cache */ - down_read(&nm_i->nat_tree_lock); + percpu_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); return; } @@ -404,11 +403,11 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) node_info_from_raw_nat(ni, &ne); f2fs_put_page(page, 1); cache: - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); /* cache nat entry */ - down_write(&nm_i->nat_tree_lock); + percpu_down_write(&nm_i->nat_tree_lock); cache_nat_entry(sbi, nid, &ne); - up_write(&nm_i->nat_tree_lock); + percpu_up_write(&nm_i->nat_tree_lock); } /* @@ -1783,7 +1782,7 @@ void build_free_nids(struct f2fs_sb_info *sbi) ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true); - down_read(&nm_i->nat_tree_lock); + percpu_down_read(&nm_i->nat_tree_lock); while (1) { struct page *page = get_current_nat_page(sbi, nid); @@ -1815,7 +1814,7 @@ void build_free_nids(struct f2fs_sb_info *sbi) remove_free_nid(nm_i, nid); } up_read(&curseg->journal_rwsem); - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); @@ -2204,7 +2203,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!nm_i->dirty_nat_cnt) return; - down_write(&nm_i->nat_tree_lock); + percpu_down_write(&nm_i->nat_tree_lock); /* * if there are no enough space in journal to store dirty nat @@ -2227,7 +2226,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) list_for_each_entry_safe(set, tmp, &sets, set_list) __flush_nat_entry_set(sbi, set); - up_write(&nm_i->nat_tree_lock); + percpu_up_write(&nm_i->nat_tree_lock); f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); } @@ -2263,7 +2262,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi) mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->free_nid_list_lock); - init_rwsem(&nm_i->nat_tree_lock); + if (percpu_init_rwsem(&nm_i->nat_tree_lock)) + return -ENOMEM; nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); @@ -2320,7 +2320,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->free_nid_list_lock); /* destroy nat cache */ - down_write(&nm_i->nat_tree_lock); + percpu_down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; @@ -2345,8 +2345,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) kmem_cache_free(nat_entry_set_slab, setvec[idx]); } } - up_write(&nm_i->nat_tree_lock); + percpu_up_write(&nm_i->nat_tree_lock); + percpu_free_rwsem(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); sbi->nm_info = NULL; kfree(nm_i); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e0a975dc6fc6..2bac9171d2c3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -695,6 +695,8 @@ static void destroy_percpu_info(struct f2fs_sb_info *sbi) percpu_counter_destroy(&sbi->nr_pages[i]); percpu_counter_destroy(&sbi->alloc_valid_block_count); percpu_counter_destroy(&sbi->total_valid_inode_count); + + percpu_free_rwsem(&sbi->cp_rwsem); } static void f2fs_put_super(struct super_block *sb) @@ -1471,6 +1473,9 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) { int i, err; + if (percpu_init_rwsem(&sbi->cp_rwsem)) + return -ENOMEM; + for (i = 0; i < NR_COUNT_TYPE; i++) { err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL); if (err) @@ -1671,7 +1676,6 @@ try_onemore: sbi->write_io[i].bio = NULL; } - init_rwsem(&sbi->cp_rwsem); init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); From 3e6d0b4d9c1cd3bbe5bacbf9d6de682be2bce8e4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 Jul 2016 14:13:07 +0800 Subject: [PATCH 50/68] f2fs: fix incorrect f_bfree calculation in ->statfs As manual described, f_bfree indicates total free blocks in fs, in f2fs, it includes two parts: visible free blocks and over-provision blocks. This patch corrrects the calculation. fsblkcnt_t f_bfree; /* free blocks in fs */ Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2bac9171d2c3..edd1b356d667 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -816,7 +816,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bsize = sbi->blocksize; buf->f_blocks = total_count - start_count; - buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; + buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count; buf->f_bavail = user_block_count - valid_user_blocks(sbi); buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; From c7b41e161368388487238d71986a65290f83d74a Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 7 Jul 2016 12:13:33 +0800 Subject: [PATCH 51/68] f2fs: avoid mismatching block range for discard This patch skip discard block range smaller than trim_minlen, and can not be merged by neighbour Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6d16ecf9d29e..9e13db05e3f0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -672,6 +672,10 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) break; end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); + if (force && start && end != max_blocks + && (end - start) < cpc->trim_minlen) + continue; + __add_discard_entry(sbi, cpc, se, start, end); } } From c24a0fd655431e9f14c8a8754d0a6cc247f9e9e5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 7 Jul 2016 22:46:55 +0800 Subject: [PATCH 52/68] f2fs: fix to avoid redundant discard during fstrim With below test steps, f2fs will issue redundant discard when doing fstrim, the reason is that we issue discards for both prefree segments and consecutive freed region user wants to trim, part regions they covered are overlapped, here, we change to do not to issue any discards for prefree segments in trimmed range. 1. mount -t f2fs -o discard /dev/zram0 /mnt/f2fs 2. fstrim -o 0 -l 3221225472 -m 2097152 -v /mnt/f2fs/ 3. dd if=/dev/zero of=/mnt/f2fs/a bs=2M count=1 4. dd if=/dev/zero of=/mnt/f2fs/b bs=1M count=1 5. sync 6. rm /mnt/f2fs/a /mnt/f2fs/b 7. fstrim -o 0 -l 3221225472 -m 2097152 -v /mnt/f2fs/ Before: <...>-5428 [001] ...1 9511.052125: f2fs_issue_discard: dev = (251,0), blkstart = 0x2200, blklen = 0x200 <...>-5428 [001] ...1 9511.052787: f2fs_issue_discard: dev = (251,0), blkstart = 0x2200, blklen = 0x300 After: <...>-6764 [000] ...1 9720.382504: f2fs_issue_discard: dev = (251,0), blkstart = 0x2200, blklen = 0x300 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9e13db05e3f0..08f6c0be20cf 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -714,6 +714,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int start = 0, end = -1; unsigned int secno, start_segno; + bool force = (cpc->reason == CP_DISCARD); mutex_lock(&dirty_i->seglist_lock); @@ -730,7 +731,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) dirty_i->nr_dirty[PRE] -= end - start; - if (!test_opt(sbi, DISCARD)) + if (force || !test_opt(sbi, DISCARD)) continue; if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { @@ -754,7 +755,7 @@ next: /* send small discards */ list_for_each_entry_safe(entry, this, head, list) { - if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen) + if (force && entry->len < cpc->trim_minlen) goto skip; f2fs_issue_discard(sbi, entry->blkaddr, entry->len); cpc->trimmed += entry->len; From a2ee0a300344a6da76186129b078113354fe13d2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Jul 2016 10:06:23 -0700 Subject: [PATCH 53/68] f2fs: move i_size_write in f2fs_write_end We don't need to do i_size_write under page lock. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b6fd5bd05e41..20b30162e7b4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1685,11 +1685,11 @@ static int f2fs_write_end(struct file *file, trace_f2fs_write_end(inode, pos, len, copied); set_page_dirty(page); + f2fs_put_page(page, 1); if (pos + copied > i_size_read(inode)) f2fs_i_size_write(inode, pos + copied); - f2fs_put_page(page, 1); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return copied; } From b56ab837a06f3042a54b17a2a4ab3300eb03ecf5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Jun 2016 19:09:37 -0700 Subject: [PATCH 54/68] f2fs: avoid mark_inode_dirty Let's check inode's dirtiness before calling mark_inode_dirty. Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 2 +- fs/f2fs/dir.c | 6 ++-- fs/f2fs/extent_cache.c | 2 +- fs/f2fs/f2fs.h | 24 ++++++++------- fs/f2fs/file.c | 8 ++--- fs/f2fs/inline.c | 2 +- fs/f2fs/inode.c | 9 +++++- fs/f2fs/namei.c | 6 ++-- fs/f2fs/super.c | 67 ++++++++++++++++++++++++------------------ fs/f2fs/xattr.c | 4 +-- 10 files changed, 74 insertions(+), 56 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 6a414e75e705..4dcc9e28dc5c 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -384,7 +384,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, if (error) return error; - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); if (default_acl) { error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 7ba52a04e13a..db4022f9c5b1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -300,7 +300,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; - mark_inode_dirty_sync(dir); + f2fs_mark_inode_dirty_sync(dir); f2fs_put_page(page, 1); } @@ -452,7 +452,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, clear_inode_flag(inode, FI_NEW_INODE); } dir->i_mtime = dir->i_ctime = CURRENT_TIME; - mark_inode_dirty_sync(dir); + f2fs_mark_inode_dirty_sync(dir); if (F2FS_I(dir)->i_current_depth != current_depth) f2fs_i_depth_write(dir, current_depth); @@ -704,7 +704,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, set_page_dirty(page); dir->i_ctime = dir->i_mtime = CURRENT_TIME; - mark_inode_dirty_sync(dir); + f2fs_mark_inode_dirty_sync(dir); if (inode) f2fs_drop_nlink(dir, inode); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index e858869d76cb..5b4b6d426ebb 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -172,7 +172,7 @@ static void __drop_largest_extent(struct inode *inode, if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) { largest->len = 0; - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); } } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 88fa13909b9c..0e46dd0a09d3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -506,12 +506,13 @@ static inline bool __is_front_mergeable(struct extent_info *cur, return __is_extent_mergeable(cur, front); } +extern void f2fs_mark_inode_dirty_sync(struct inode *); static inline void __try_update_largest_extent(struct inode *inode, struct extent_tree *et, struct extent_node *en) { if (en->ei.len > et->largest.len) { et->largest = en->ei; - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); } } @@ -1551,7 +1552,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, return; case FI_DATA_EXIST: case FI_INLINE_DOTS: - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); } } @@ -1578,7 +1579,7 @@ static inline void set_acl_inode(struct inode *inode, umode_t mode) { F2FS_I(inode)->i_acl_mode = mode; set_inode_flag(inode, FI_ACL_MODE); - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); } static inline void f2fs_i_links_write(struct inode *inode, bool inc) @@ -1587,7 +1588,7 @@ static inline void f2fs_i_links_write(struct inode *inode, bool inc) inc_nlink(inode); else drop_nlink(inode); - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); } static inline void f2fs_i_blocks_write(struct inode *inode, @@ -1598,7 +1599,7 @@ static inline void f2fs_i_blocks_write(struct inode *inode, inode->i_blocks = add ? inode->i_blocks + diff : inode->i_blocks - diff; - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); if (clean || recover) set_inode_flag(inode, FI_AUTO_RECOVER); } @@ -1612,7 +1613,7 @@ static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size) return; i_size_write(inode, i_size); - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); if (clean || recover) set_inode_flag(inode, FI_AUTO_RECOVER); } @@ -1627,19 +1628,19 @@ static inline bool f2fs_skip_inode_update(struct inode *inode) static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) { F2FS_I(inode)->i_current_depth = depth; - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); } static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid) { F2FS_I(inode)->i_xattr_nid = xnid; - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); } static inline void f2fs_i_pino_write(struct inode *inode, nid_t pino) { F2FS_I(inode)->i_pino = pino; - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); } static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) @@ -1767,13 +1768,13 @@ static inline int is_file(struct inode *inode, int type) static inline void set_file(struct inode *inode, int type) { F2FS_I(inode)->i_advise |= type; - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); } static inline void clear_file(struct inode *inode, int type) { F2FS_I(inode)->i_advise &= ~type; - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); } static inline int f2fs_readonly(struct super_block *sb) @@ -1920,6 +1921,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) /* * super.c */ +int f2fs_inode_dirtied(struct inode *); void f2fs_inode_synced(struct inode *); int f2fs_commit_super(struct f2fs_sb_info *, bool); int f2fs_sync_fs(struct super_block *, int); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2b777a42bf43..1ec0197a3c9b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -263,7 +263,7 @@ sync_nodes: } if (need_inode_block_update(sbi, ino)) { - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); f2fs_write_inode(inode, NULL); goto sync_nodes; } @@ -631,7 +631,7 @@ int f2fs_truncate(struct inode *inode) return err; inode->i_mtime = inode->i_ctime = CURRENT_TIME; - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); return 0; } @@ -721,7 +721,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) } } - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); return err; } @@ -1276,7 +1276,7 @@ static long f2fs_fallocate(struct file *file, int mode, if (!ret) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 2cd0edcc4ebc..d411ab6c7483 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -569,7 +569,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_put_page(page, 1); dir->i_ctime = dir->i_mtime = CURRENT_TIME; - mark_inode_dirty_sync(dir); + f2fs_mark_inode_dirty_sync(dir); if (inode) f2fs_drop_nlink(dir, inode); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9221ca22720c..9ac5efc15347 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -18,6 +18,13 @@ #include +void f2fs_mark_inode_dirty_sync(struct inode *inode) +{ + if (f2fs_inode_dirtied(inode)) + return; + mark_inode_dirty_sync(inode); +} + void f2fs_set_inode_flags(struct inode *inode) { unsigned int flags = F2FS_I(inode)->i_flags; @@ -35,7 +42,7 @@ void f2fs_set_inode_flags(struct inode *inode) new_fl |= S_DIRSYNC; inode_set_flags(inode, new_fl, S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 4460400133cf..c9ba6d7a6a06 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -757,7 +757,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = CURRENT_TIME; - mark_inode_dirty_sync(old_inode); + f2fs_mark_inode_dirty_sync(old_inode); f2fs_delete_entry(old_entry, old_page, old_dir, NULL); @@ -909,7 +909,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_i_links_write(old_dir, old_nlink > 0); up_write(&F2FS_I(old_dir)->i_sem); } - mark_inode_dirty_sync(old_dir); + f2fs_mark_inode_dirty_sync(old_dir); /* update directory entry info of new dir inode */ f2fs_set_link(new_dir, new_entry, new_page, old_inode); @@ -924,7 +924,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_i_links_write(new_dir, new_nlink > 0); up_write(&F2FS_I(new_dir)->i_sem); } - mark_inode_dirty_sync(new_dir); + f2fs_mark_inode_dirty_sync(new_dir); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index edd1b356d667..451dfb4041e8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -625,6 +625,43 @@ static int f2fs_drop_inode(struct inode *inode) return generic_drop_inode(inode); } +int f2fs_inode_dirtied(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + spin_lock(&sbi->inode_lock[DIRTY_META]); + if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { + spin_unlock(&sbi->inode_lock[DIRTY_META]); + return 1; + } + + set_inode_flag(inode, FI_DIRTY_INODE); + list_add_tail(&F2FS_I(inode)->gdirty_list, + &sbi->inode_list[DIRTY_META]); + inc_page_count(sbi, F2FS_DIRTY_IMETA); + stat_inc_dirty_inode(sbi, DIRTY_META); + spin_unlock(&sbi->inode_lock[DIRTY_META]); + + return 0; +} + +void f2fs_inode_synced(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + spin_lock(&sbi->inode_lock[DIRTY_META]); + if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) { + spin_unlock(&sbi->inode_lock[DIRTY_META]); + return; + } + list_del_init(&F2FS_I(inode)->gdirty_list); + clear_inode_flag(inode, FI_DIRTY_INODE); + clear_inode_flag(inode, FI_AUTO_RECOVER); + dec_page_count(sbi, F2FS_DIRTY_IMETA); + stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META); + spin_unlock(&sbi->inode_lock[DIRTY_META]); +} + /* * f2fs_dirty_inode() is called from __mark_inode_dirty() * @@ -644,35 +681,7 @@ static void f2fs_dirty_inode(struct inode *inode, int flags) if (is_inode_flag_set(inode, FI_AUTO_RECOVER)) clear_inode_flag(inode, FI_AUTO_RECOVER); - spin_lock(&sbi->inode_lock[DIRTY_META]); - if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { - spin_unlock(&sbi->inode_lock[DIRTY_META]); - return; - } - - set_inode_flag(inode, FI_DIRTY_INODE); - list_add_tail(&F2FS_I(inode)->gdirty_list, - &sbi->inode_list[DIRTY_META]); - inc_page_count(sbi, F2FS_DIRTY_IMETA); - stat_inc_dirty_inode(sbi, DIRTY_META); - spin_unlock(&sbi->inode_lock[DIRTY_META]); -} - -void f2fs_inode_synced(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - - spin_lock(&sbi->inode_lock[DIRTY_META]); - if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) { - spin_unlock(&sbi->inode_lock[DIRTY_META]); - return; - } - list_del_init(&F2FS_I(inode)->gdirty_list); - clear_inode_flag(inode, FI_DIRTY_INODE); - clear_inode_flag(inode, FI_AUTO_RECOVER); - dec_page_count(sbi, F2FS_DIRTY_IMETA); - stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META); - spin_unlock(&sbi->inode_lock[DIRTY_META]); + f2fs_inode_dirtied(inode); } static void f2fs_i_callback(struct rcu_head *head) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 2e8cb873c31e..c8898b5148eb 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -106,7 +106,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler, return -EINVAL; F2FS_I(inode)->i_advise |= *(char *)value; - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); return 0; } @@ -547,7 +547,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (index == F2FS_XATTR_INDEX_ENCRYPTION && !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) f2fs_set_encrypted_inode(inode); - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); exit: kzfree(base_addr); return error; From 1d353eb7e43853335a3c535c204c4e86f82eaf66 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 Jul 2016 09:38:48 -0700 Subject: [PATCH 55/68] f2fs: fix ERR_PTR returned by bio This is to fix wrong error pointer handling flow reported by Dan. Reported-by: Dan Carpenter Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 20b30162e7b4..650099597dd2 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1085,8 +1085,10 @@ submit_and_realloc: } if (bio == NULL) { bio = f2fs_grab_bio(inode, block_nr, nr_pages); - if (IS_ERR(bio)) + if (IS_ERR(bio)) { + bio = NULL; goto set_error_page; + } } if (bio_add_page(bio, page, blocksize, 0) < blocksize) From 0a2aa8fbb9693020b822ac7a23755591554eaea5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 Jul 2016 17:42:21 -0700 Subject: [PATCH 56/68] f2fs: refactor __exchange_data_block for speed up This reduces the elapsed time to do xfstests/generic/017. Before: 715 s After: 458 s Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 250 ++++++++++++++++++++++++++++++++++--------------- fs/f2fs/node.c | 1 + 2 files changed, 178 insertions(+), 73 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1ec0197a3c9b..b1dc972407e5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -850,85 +850,189 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) return ret; } -static int __exchange_data_block(struct inode *inode, pgoff_t src, - pgoff_t dst, bool full) +static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, + int *do_replace, pgoff_t off, pgoff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; - block_t new_addr; - bool do_replace = false; - int ret; + int ret, done, i; +next_dnode: set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, src, LOOKUP_NODE_RA); + ret = get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); if (ret && ret != -ENOENT) { return ret; } else if (ret == -ENOENT) { - new_addr = NULL_ADDR; - } else { - new_addr = dn.data_blkaddr; - if (!is_checkpointed_data(sbi, new_addr)) { + if (dn.max_level == 0) + return -ENOENT; + done = min((pgoff_t)ADDRS_PER_BLOCK - dn.ofs_in_node, len); + blkaddr += done; + do_replace += done; + goto next; + } + + done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) - + dn.ofs_in_node, len); + for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { + *blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + if (!is_checkpointed_data(sbi, *blkaddr)) { + + if (test_opt(sbi, LFS)) { + f2fs_put_dnode(&dn); + return -ENOTSUPP; + } + /* do not invalidate this block address */ f2fs_update_data_blkaddr(&dn, NULL_ADDR); - do_replace = true; + *do_replace = 1; } - f2fs_put_dnode(&dn); } + f2fs_put_dnode(&dn); +next: + len -= done; + off += done; + if (len) + goto next_dnode; + return 0; +} - if (new_addr == NULL_ADDR) - return full ? truncate_hole(inode, dst, dst + 1) : 0; +static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, + int *do_replace, pgoff_t off, int len) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + int ret, i; - if (do_replace) { - struct page *ipage; - struct node_info ni; + for (i = 0; i < len; i++, do_replace++, blkaddr++) { + if (*do_replace == 0) + continue; - if (test_opt(sbi, LFS)) { - ret = -ENOTSUPP; - goto err_out; + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); + if (ret) { + dec_valid_block_count(sbi, inode, 1); + invalidate_blocks(sbi, *blkaddr); + } else { + f2fs_update_data_blkaddr(&dn, *blkaddr); } - - ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - ret = PTR_ERR(ipage); - goto err_out; - } - - set_new_dnode(&dn, inode, ipage, NULL, 0); - ret = f2fs_reserve_block(&dn, dst); - if (ret) - goto err_out; - - truncate_data_blocks_range(&dn, 1); - - get_node_info(sbi, dn.nid, &ni); - f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr, - ni.version, true, false); f2fs_put_dnode(&dn); - } else { - struct page *psrc, *pdst; - - psrc = get_lock_data_page(inode, src, true); - if (IS_ERR(psrc)) - return PTR_ERR(psrc); - pdst = get_new_data_page(inode, NULL, dst, true); - if (IS_ERR(pdst)) { - f2fs_put_page(psrc, 1); - return PTR_ERR(pdst); - } - f2fs_copy_page(psrc, pdst); - set_page_dirty(pdst); - f2fs_put_page(pdst, 1); - f2fs_put_page(psrc, 1); - - return truncate_hole(inode, src, src + 1); } return 0; +} -err_out: - if (!get_dnode_of_data(&dn, src, LOOKUP_NODE)) { - f2fs_update_data_blkaddr(&dn, new_addr); - f2fs_put_dnode(&dn); +static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, + block_t *blkaddr, int *do_replace, + pgoff_t src, pgoff_t dst, pgoff_t len, bool full) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode); + pgoff_t i = 0; + int ret; + + while (i < len) { + if (blkaddr[i] == NULL_ADDR && !full) { + i++; + continue; + } + + if (do_replace[i] || blkaddr[i] == NULL_ADDR) { + struct dnode_of_data dn; + struct node_info ni; + size_t new_size; + pgoff_t ilen; + + set_new_dnode(&dn, dst_inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, dst + i, ALLOC_NODE); + if (ret) + return ret; + + get_node_info(sbi, dn.nid, &ni); + ilen = min((pgoff_t) + ADDRS_PER_PAGE(dn.node_page, dst_inode) - + dn.ofs_in_node, len - i); + do { + dn.data_blkaddr = datablock_addr(dn.node_page, + dn.ofs_in_node); + truncate_data_blocks_range(&dn, 1); + + if (do_replace[i]) { + f2fs_i_blocks_write(src_inode, + 1, false); + f2fs_i_blocks_write(dst_inode, + 1, true); + f2fs_replace_block(sbi, &dn, dn.data_blkaddr, + blkaddr[i], ni.version, true, false); + + do_replace[i] = 0; + } + dn.ofs_in_node++; + i++; + new_size = (dst + i) << PAGE_SHIFT; + if (dst_inode->i_size < new_size) + f2fs_i_size_write(dst_inode, new_size); + } while ((do_replace[i] || blkaddr[i] == NULL_ADDR) && --ilen); + + f2fs_put_dnode(&dn); + } else { + struct page *psrc, *pdst; + + psrc = get_lock_data_page(src_inode, src + i, true); + if (IS_ERR(psrc)) + return PTR_ERR(psrc); + pdst = get_new_data_page(dst_inode, NULL, dst + i, + true); + if (IS_ERR(pdst)) { + f2fs_put_page(psrc, 1); + return PTR_ERR(pdst); + } + f2fs_copy_page(psrc, pdst); + set_page_dirty(pdst); + f2fs_put_page(pdst, 1); + f2fs_put_page(psrc, 1); + + ret = truncate_hole(src_inode, src + i, src + i + 1); + if (ret) + return ret; + i++; + } } + return 0; +} + +static int __exchange_data_block(struct inode *src_inode, + struct inode *dst_inode, pgoff_t src, pgoff_t dst, + int len, bool full) +{ + block_t *src_blkaddr; + int *do_replace; + int ret; + + src_blkaddr = f2fs_kvzalloc(sizeof(block_t) * len, GFP_KERNEL); + if (!src_blkaddr) + return -ENOMEM; + + do_replace = f2fs_kvzalloc(sizeof(int) * len, GFP_KERNEL); + if (!do_replace) { + kvfree(src_blkaddr); + return -ENOMEM; + } + + ret = __read_out_blkaddrs(src_inode, src_blkaddr, do_replace, src, len); + if (ret) + goto roll_back; + + ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr, + do_replace, src, dst, len, full); + if (ret) + goto roll_back; + + kvfree(src_blkaddr); + kvfree(do_replace); + return 0; + +roll_back: + __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, len); + kvfree(src_blkaddr); + kvfree(do_replace); return ret; } @@ -936,16 +1040,12 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; - int ret = 0; + int ret; - for (; end < nrpages; start++, end++) { - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); - ret = __exchange_data_block(inode, end, start, true); - f2fs_unlock_op(sbi); - if (ret) - break; - } + f2fs_balance_fs(sbi, true); + f2fs_lock_op(sbi); + ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); + f2fs_unlock_op(sbi); return ret; } @@ -1134,7 +1234,7 @@ out: static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - pgoff_t pg_start, pg_end, delta, nrpages, idx; + pgoff_t nr, pg_start, pg_end, delta, idx; loff_t new_size; int ret = 0; @@ -1169,14 +1269,18 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; delta = pg_end - pg_start; - nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + + while (!ret && idx > pg_start) { + nr = idx - pg_start; + if (nr > delta) + nr = delta; + idx -= nr; - for (idx = nrpages - 1; idx >= pg_start && idx != -1; idx--) { f2fs_lock_op(sbi); - ret = __exchange_data_block(inode, idx, idx + delta, false); + ret = __exchange_data_block(inode, inode, idx, + idx + delta, nr, false); f2fs_unlock_op(sbi); - if (ret) - break; } /* write out all moved pages, if possible */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b841c439adb7..d78f61d647c0 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -649,6 +649,7 @@ release_out: if (err == -ENOENT) { dn->cur_level = i; dn->max_level = level; + dn->ofs_in_node = offset[level]; } return err; } From 5f281fab9b9a30073616c5e25da2111dec2fe482 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 Jul 2016 11:07:52 -0700 Subject: [PATCH 57/68] f2fs: disable extent_cache for fcollapse/finsert inodes This reduces the elapsed time to do xfstests/generic/017. Before: 458 s After: 390 s Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 13 +++++++++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 5 +++++ 3 files changed, 19 insertions(+) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 5b4b6d426ebb..2b06d4fcd954 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -631,6 +631,19 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode) return node_cnt; } +void f2fs_drop_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et = F2FS_I(inode)->extent_tree; + + set_inode_flag(inode, FI_NO_EXTENT); + + write_lock(&et->lock); + __free_extent_tree(sbi, et); + __drop_largest_extent(inode, 0, UINT_MAX); + write_unlock(&et->lock); +} + void f2fs_destroy_extent_tree(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0e46dd0a09d3..b4a46b6823dc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2297,6 +2297,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *); */ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *); +void f2fs_drop_extent_tree(struct inode *); unsigned int f2fs_destroy_extent_node(struct inode *); void f2fs_destroy_extent_tree(struct inode *); bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b1dc972407e5..72e52cd15b60 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1044,6 +1044,9 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); + + f2fs_drop_extent_tree(inode); + ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); f2fs_unlock_op(sbi); return ret; @@ -1278,6 +1281,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) idx -= nr; f2fs_lock_op(sbi); + f2fs_drop_extent_tree(inode); + ret = __exchange_data_block(inode, inode, idx, idx + delta, nr, false); f2fs_unlock_op(sbi); From 44a83499dda714d9262a9bf4fdac8c077893c9e6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 Jul 2016 18:23:35 -0700 Subject: [PATCH 58/68] f2fs: add maximum prefree segments In 1TB storage, we need to admit 22841 prefree segments, which can consume too much segments. This patch sets 8GB in max. prefree segments in that case. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 +++ fs/f2fs/segment.h | 1 + 2 files changed, 4 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 08f6c0be20cf..e87aa058f57a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2409,6 +2409,9 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); sm_info->rec_prefree_segments = sm_info->main_segments * DEF_RECLAIM_PREFREE_SEGMENTS / 100; + if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS) + sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS; + if (!test_opt(sbi, LFS)) sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 57d450fb4643..b33f73ec60a4 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -16,6 +16,7 @@ #define NULL_SECNO ((unsigned int)(~0)) #define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */ +#define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */ /* L: Logical segment # in volume, R: Relative segment # in main area */ #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) From 82e0a5aa5ddf794b3e1b21fcd091228736871882 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 13 Jul 2016 09:18:29 +0800 Subject: [PATCH 59/68] f2fs: fix to avoid data update racing between GC and DIO Datas in file can be operated by GC and DIO simultaneously, so we will face race case as below: For write case: Thread A Thread B - generic_file_direct_write - invalidate_inode_pages2_range - f2fs_direct_IO - do_blockdev_direct_IO - do_direct_IO - get_more_blocks - f2fs_gc - do_garbage_collect - gc_data_segment - move_data_page - do_write_data_page migrate data block to new block address - dio_bio_submit update user data to old block address For read case: Thread A Thread B - generic_file_direct_write - invalidate_inode_pages2_range - f2fs_direct_IO - do_blockdev_direct_IO - do_direct_IO - get_more_blocks - f2fs_balance_fs - f2fs_gc - do_garbage_collect - gc_data_segment - move_data_page - do_write_data_page migrate data block to new block address - write_checkpoint - do_checkpoint - clear_prefree_segments - f2fs_issue_discard discard old block adress - dio_bio_submit update user buffer from obsolete block address In order to fix this, for one file, we should let DIO and GC getting exclusion against with each other. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +++++- fs/f2fs/f2fs.h | 1 + fs/f2fs/gc.c | 20 ++++++++++++++++++++ fs/f2fs/super.c | 2 ++ 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 650099597dd2..adfe47b21991 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1716,6 +1716,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = mapping->host; size_t count = iov_iter_count(iter); loff_t offset = iocb->ki_pos; + int rw = iov_iter_rw(iter); int err; err = check_direct_IO(inode, iter, offset); @@ -1729,8 +1730,11 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); + down_read(&F2FS_I(inode)->dio_rwsem[rw]); err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); - if (iov_iter_rw(iter) == WRITE) { + up_read(&F2FS_I(inode)->dio_rwsem[rw]); + + if (rw == WRITE) { if (err > 0) set_inode_flag(inode, FI_UPDATE_WRITE); else if (err < 0) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b4a46b6823dc..211183c4e5c3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -454,6 +454,7 @@ struct f2fs_inode_info { struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct mutex inmem_lock; /* lock for inmemory pages */ struct extent_tree *extent_tree; /* cached extent_tree entry */ + struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ }; static inline void get_extent_info(struct extent_info *ext, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c61213785914..5c8acf754513 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -755,12 +755,32 @@ next_step: /* phase 3 */ inode = find_gc_inode(gc_list, dni.ino); if (inode) { + struct f2fs_inode_info *fi = F2FS_I(inode); + bool locked = false; + + if (S_ISREG(inode->i_mode)) { + if (!down_write_trylock(&fi->dio_rwsem[READ])) + continue; + if (!down_write_trylock( + &fi->dio_rwsem[WRITE])) { + up_write(&fi->dio_rwsem[READ]); + continue; + } + locked = true; + } + start_bidx = start_bidx_of_node(nofs, inode) + ofs_in_node; if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) move_encrypted_block(inode, start_bidx); else move_data_page(inode, start_bidx, gc_type); + + if (locked) { + up_write(&fi->dio_rwsem[WRITE]); + up_write(&fi->dio_rwsem[READ]); + } + stat_inc_data_blk_count(sbi, 1, gc_type); } } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 451dfb4041e8..b97c065cbe74 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -579,6 +579,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&fi->gdirty_list); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); + init_rwsem(&fi->dio_rwsem[READ]); + init_rwsem(&fi->dio_rwsem[WRITE]); /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; From 9dfa1baff76d08843aaf5e3c78f6da6950957702 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 Jul 2016 19:33:19 -0700 Subject: [PATCH 60/68] f2fs: use blk_plug in all the possible paths This patch reverts 19a5f5e2ef37 (f2fs: drop any block plugging), and adds blk_plug in write paths additionally. The main reason is that blk_start_plug can be used to wake up from low-power mode before submitting further bios. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 +++++++ fs/f2fs/data.c | 3 +++ fs/f2fs/file.c | 6 +++++- fs/f2fs/gc.c | 5 +++++ fs/f2fs/node.c | 3 +++ fs/f2fs/segment.c | 7 ++++++- 6 files changed, 29 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 8ea895389ae4..be1c54b62388 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -265,6 +265,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + struct blk_plug plug; long diff, written; /* collect a number of dirty meta pages and write together */ @@ -277,7 +278,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping, /* if mounting is failed, skip writing node pages */ mutex_lock(&sbi->cp_mutex); diff = nr_pages_to_write(sbi, META, wbc); + blk_start_plug(&plug); written = sync_meta_pages(sbi, META, wbc->nr_to_write); + blk_finish_plug(&plug); mutex_unlock(&sbi->cp_mutex); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; @@ -899,8 +902,11 @@ static int block_operations(struct f2fs_sb_info *sbi) .nr_to_write = LONG_MAX, .for_reclaim = 0, }; + struct blk_plug plug; int err = 0; + blk_start_plug(&plug); + retry_flush_dents: f2fs_lock_all(sbi); /* write all the dirty dentry pages */ @@ -937,6 +943,7 @@ retry_flush_nodes: goto retry_flush_nodes; } out: + blk_finish_plug(&plug); return err; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index adfe47b21991..87a458fb8afe 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1438,6 +1438,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct blk_plug plug; int ret; /* deal with chardevs and other special file */ @@ -1463,7 +1464,9 @@ static int f2fs_write_data_pages(struct address_space *mapping, trace_f2fs_writepages(mapping->host, wbc, DATA); + blk_start_plug(&plug); ret = f2fs_write_cache_pages(mapping, wbc); + blk_finish_plug(&plug); /* * if some pages were truncated, we cannot guarantee its mapping->host * to detect pending bios. diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 72e52cd15b60..47f1f5e36d1c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2102,6 +2102,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + struct blk_plug plug; ssize_t ret; if (f2fs_encrypted_inode(inode) && @@ -2113,8 +2114,11 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = generic_write_checks(iocb, from); if (ret > 0) { ret = f2fs_preallocate_blocks(iocb, from); - if (!ret) + if (!ret) { + blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); + blk_finish_plug(&plug); + } } inode_unlock(inode); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 5c8acf754513..de6c41c32c62 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -808,6 +808,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, { struct page *sum_page; struct f2fs_summary_block *sum; + struct blk_plug plug; unsigned int segno = start_segno; unsigned int end_segno = start_segno + sbi->segs_per_sec; int seg_freed = 0; @@ -825,6 +826,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unlock_page(sum_page); } + blk_start_plug(&plug); + for (segno = start_segno; segno < end_segno; segno++) { if (get_valid_blocks(sbi, segno, 1) == 0) @@ -862,6 +865,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, f2fs_submit_merged_bio(sbi, (type == SUM_TYPE_NODE) ? NODE : DATA, WRITE); + blk_finish_plug(&plug); + if (gc_type == FG_GC) { while (start_segno < end_segno) if (get_valid_blocks(sbi, start_segno++, 1) == 0) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d78f61d647c0..79a93c6e632f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1618,6 +1618,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + struct blk_plug plug; long diff; /* balancing f2fs's metadata in background */ @@ -1631,7 +1632,9 @@ static int f2fs_write_node_pages(struct address_space *mapping, diff = nr_pages_to_write(sbi, NODE, wbc); wbc->sync_mode = WB_SYNC_NONE; + blk_start_plug(&plug); sync_node_pages(sbi, wbc); + blk_finish_plug(&plug); wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); return 0; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e87aa058f57a..d45e6bbf8493 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -381,8 +381,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) excess_prefree_segs(sbi) || excess_dirty_nats(sbi) || (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) { - if (test_opt(sbi, DATA_FLUSH)) + if (test_opt(sbi, DATA_FLUSH)) { + struct blk_plug plug; + + blk_start_plug(&plug); sync_dirty_inodes(sbi, FILE_INODE); + blk_finish_plug(&plug); + } f2fs_sync_fs(sbi->sb, true); stat_inc_bg_cp_count(sbi->stat_info); } From dcf25fe8fcf4e68057d02e453e7ccf93fa1d1071 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 15 Jul 2016 19:25:47 +0800 Subject: [PATCH 61/68] f2fs: reset default idle interval value The default value of idle interval is 2 mins, but for most time when screen shutdown, there are still operations during the 2 mins interval, and gc's sleep time is about 30 secs to 60 secs, so there is almost no chance for GC thread to do garbage collecting. Set default value of idle interval value from 2 mins to 5 secs for fixing. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 211183c4e5c3..521cb92285fa 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -164,7 +164,7 @@ enum { #define BATCHED_TRIM_BLOCKS(sbi) \ (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) #define DEF_CP_INTERVAL 60 /* 60 secs */ -#define DEF_IDLE_INTERVAL 120 /* 2 mins */ +#define DEF_IDLE_INTERVAL 5 /* 5 secs */ struct cp_control { int reason; From 363cad7f7e586b385c20e9925b4923683d46deb6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 16 Jul 2016 21:59:22 -0700 Subject: [PATCH 62/68] f2fs: avoid memory allocation failure due to a long length We need to avoid ENOMEM due to unexpected long length. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 48 +++++++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 47f1f5e36d1c..9f9cb6489997 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1000,33 +1000,43 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, static int __exchange_data_block(struct inode *src_inode, struct inode *dst_inode, pgoff_t src, pgoff_t dst, - int len, bool full) + pgoff_t len, bool full) { block_t *src_blkaddr; int *do_replace; + pgoff_t olen; int ret; - src_blkaddr = f2fs_kvzalloc(sizeof(block_t) * len, GFP_KERNEL); - if (!src_blkaddr) - return -ENOMEM; + while (len) { + olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len); + + src_blkaddr = f2fs_kvzalloc(sizeof(block_t) * olen, GFP_KERNEL); + if (!src_blkaddr) + return -ENOMEM; + + do_replace = f2fs_kvzalloc(sizeof(int) * olen, GFP_KERNEL); + if (!do_replace) { + kvfree(src_blkaddr); + return -ENOMEM; + } + + ret = __read_out_blkaddrs(src_inode, src_blkaddr, + do_replace, src, olen); + if (ret) + goto roll_back; + + ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr, + do_replace, src, dst, olen, full); + if (ret) + goto roll_back; + + src += olen; + dst += olen; + len -= olen; - do_replace = f2fs_kvzalloc(sizeof(int) * len, GFP_KERNEL); - if (!do_replace) { kvfree(src_blkaddr); - return -ENOMEM; + kvfree(do_replace); } - - ret = __read_out_blkaddrs(src_inode, src_blkaddr, do_replace, src, len); - if (ret) - goto roll_back; - - ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr, - do_replace, src, dst, len, full); - if (ret) - goto roll_back; - - kvfree(src_blkaddr); - kvfree(do_replace); return 0; roll_back: From 91246c21b85985c48b1e1f5603e0d81161eb76a4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 19 Jul 2016 08:27:47 +0800 Subject: [PATCH 63/68] f2fs: fix to report error number of f2fs_find_entry This patch fixes to report the right error number of f2fs_find_entry to its caller. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 10 +++++----- fs/f2fs/f2fs.h | 2 +- fs/f2fs/namei.c | 46 +++++++++++++++++++++++++++++++++++----------- fs/f2fs/recovery.c | 7 +++++-- 4 files changed, 46 insertions(+), 19 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index db4022f9c5b1..a485f68a76b1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -272,17 +272,17 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) return f2fs_find_entry(dir, &dotdot, p); } -ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) +ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr, + struct page **page) { ino_t res = 0; struct f2fs_dir_entry *de; - struct page *page; - de = f2fs_find_entry(dir, qstr, &page); + de = f2fs_find_entry(dir, qstr, page); if (de) { res = le32_to_cpu(de->ino); - f2fs_dentry_kunmap(dir, page); - f2fs_put_page(page, 0); + f2fs_dentry_kunmap(dir, *page); + f2fs_put_page(*page, 0); } return res; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 521cb92285fa..c7378540ba37 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1898,7 +1898,7 @@ void f2fs_drop_nlink(struct inode *, struct inode *); struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, struct page **); struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); -ino_t f2fs_inode_by_name(struct inode *, struct qstr *); +ino_t f2fs_inode_by_name(struct inode *, struct qstr *, struct page **); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); int update_dent_inode(struct inode *, struct inode *, const struct qstr *); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index c9ba6d7a6a06..73fa356f8fbb 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -202,9 +202,13 @@ out: struct dentry *f2fs_get_parent(struct dentry *child) { struct qstr dotdot = QSTR_INIT("..", 2); - unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot); - if (!ino) + struct page *page; + unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot, &page); + if (!ino) { + if (IS_ERR(page)) + return ERR_CAST(page); return ERR_PTR(-ENOENT); + } return d_obtain_alias(f2fs_iget(child->d_sb, ino)); } @@ -338,8 +342,11 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) trace_f2fs_unlink_enter(dir, dentry); de = f2fs_find_entry(dir, &dentry->d_name, &page); - if (!de) + if (!de) { + if (IS_ERR(page)) + err = PTR_ERR(page); goto fail; + } f2fs_balance_fs(sbi, true); @@ -658,13 +665,17 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); - if (!old_entry) + if (!old_entry) { + if (IS_ERR(old_page)) + err = PTR_ERR(old_page); goto out; + } if (S_ISDIR(old_inode->i_mode)) { old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); if (!old_dir_entry) { - err = PTR_ERR(old_dir_page); + if (IS_ERR(old_dir_page)) + err = PTR_ERR(old_dir_page); goto out_old; } } @@ -684,8 +695,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, err = -ENOENT; new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page); - if (!new_entry) + if (!new_entry) { + if (IS_ERR(new_page)) + err = PTR_ERR(new_page); goto out_whiteout; + } f2fs_balance_fs(sbi, true); @@ -743,7 +757,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { - err = -EIO; + err = -ENOENT; + if (IS_ERR(old_page)) + err = PTR_ERR(old_page); f2fs_unlock_op(sbi); goto out_whiteout; } @@ -829,12 +845,18 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, return -EPERM; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); - if (!old_entry) + if (!old_entry) { + if (IS_ERR(old_page)) + err = PTR_ERR(old_page); goto out; + } new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page); - if (!new_entry) + if (!new_entry) { + if (IS_ERR(new_page)) + err = PTR_ERR(new_page); goto out_old; + } /* prepare for updating ".." directory entry info later */ if (old_dir != new_dir) { @@ -842,7 +864,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); if (!old_dir_entry) { - err = PTR_ERR(old_dir_page); + if (IS_ERR(old_dir_page)) + err = PTR_ERR(old_dir_page); goto out_new; } } @@ -851,7 +874,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, new_dir_entry = f2fs_parent_dir(new_inode, &new_dir_page); if (!new_dir_entry) { - err = PTR_ERR(new_dir_page); + if (IS_ERR(new_dir_page)) + err = PTR_ERR(new_dir_page); goto out_old_dir; } } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index a39d84ab66b2..5d4461f2c266 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -153,9 +153,12 @@ retry: f2fs_delete_entry(de, page, dir, einode); iput(einode); goto retry; + } else if (IS_ERR(page)) { + err = PTR_ERR(page); + } else { + err = __f2fs_add_link(dir, &name, inode, + inode->i_ino, inode->i_mode); } - err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode); - goto out; out_unmap_put: From 4dd6f977fc778e5a0da604e5f8cb2f36d163d27b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 Jul 2016 15:16:47 -0700 Subject: [PATCH 64/68] f2fs: support an ioctl to move a range of data blocks This patch implements moving a range of data blocks from source file to destination file. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 9 ++++ fs/f2fs/file.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c7378540ba37..7a57279b2c54 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -268,6 +268,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6) #define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) #define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8) +#define F2FS_IOC_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ + struct f2fs_move_range) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -297,6 +299,13 @@ struct f2fs_defragment { u64 len; }; +struct f2fs_move_range { + u32 dst_fd; /* destination fd */ + u64 pos_in; /* start position in src_fd */ + u64 pos_out; /* start position in dst_fd */ + u64 len; /* size to move */ +}; + /* * For INODE and NODE manager */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9f9cb6489997..0e493f63ea41 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -2068,6 +2069,133 @@ out: return err; } +static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, size_t len) +{ + struct inode *src = file_inode(file_in); + struct inode *dst = file_inode(file_out); + struct f2fs_sb_info *sbi = F2FS_I_SB(src); + size_t olen = len, dst_max_i_size = 0; + size_t dst_osize; + int ret; + + if (file_in->f_path.mnt != file_out->f_path.mnt || + src->i_sb != dst->i_sb) + return -EXDEV; + + if (unlikely(f2fs_readonly(src->i_sb))) + return -EROFS; + + if (S_ISDIR(src->i_mode) || S_ISDIR(dst->i_mode)) + return -EISDIR; + + if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst)) + return -EOPNOTSUPP; + + inode_lock(src); + if (src != dst) + inode_lock(dst); + + ret = -EINVAL; + if (pos_in + len > src->i_size || pos_in + len < pos_in) + goto out_unlock; + if (len == 0) + olen = len = src->i_size - pos_in; + if (pos_in + len == src->i_size) + len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in; + if (len == 0) { + ret = 0; + goto out_unlock; + } + + dst_osize = dst->i_size; + if (pos_out + olen > dst->i_size) + dst_max_i_size = pos_out + olen; + + /* verify the end result is block aligned */ + if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) || + !IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) || + !IS_ALIGNED(pos_out, F2FS_BLKSIZE)) + goto out_unlock; + + ret = f2fs_convert_inline_inode(src); + if (ret) + goto out_unlock; + + ret = f2fs_convert_inline_inode(dst); + if (ret) + goto out_unlock; + + /* write out all dirty pages from offset */ + ret = filemap_write_and_wait_range(src->i_mapping, + pos_in, pos_in + len); + if (ret) + goto out_unlock; + + ret = filemap_write_and_wait_range(dst->i_mapping, + pos_out, pos_out + len); + if (ret) + goto out_unlock; + + f2fs_balance_fs(sbi, true); + f2fs_lock_op(sbi); + ret = __exchange_data_block(src, dst, pos_in, + pos_out, len >> F2FS_BLKSIZE_BITS, false); + + if (!ret) { + if (dst_max_i_size) + f2fs_i_size_write(dst, dst_max_i_size); + else if (dst_osize != dst->i_size) + f2fs_i_size_write(dst, dst_osize); + } + f2fs_unlock_op(sbi); +out_unlock: + if (src != dst) + inode_unlock(dst); + inode_unlock(src); + return ret; +} + +static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) +{ + struct f2fs_move_range range; + struct fd dst; + int err; + + if (!(filp->f_mode & FMODE_READ) || + !(filp->f_mode & FMODE_WRITE)) + return -EBADF; + + if (copy_from_user(&range, (struct f2fs_move_range __user *)arg, + sizeof(range))) + return -EFAULT; + + dst = fdget(range.dst_fd); + if (!dst.file) + return -EBADF; + + if (!(dst.file->f_mode & FMODE_WRITE)) { + err = -EBADF; + goto err_out; + } + + err = mnt_want_write_file(filp); + if (err) + goto err_out; + + err = f2fs_move_file_range(filp, range.pos_in, dst.file, + range.pos_out, range.len); + + mnt_drop_write_file(filp); + + if (copy_to_user((struct f2fs_move_range __user *)arg, + &range, sizeof(range))) + err = -EFAULT; +err_out: + fdput(dst); + return err; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { @@ -2103,6 +2231,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_write_checkpoint(filp, arg); case F2FS_IOC_DEFRAGMENT: return f2fs_ioc_defragment(filp, arg); + case F2FS_IOC_MOVE_RANGE: + return f2fs_ioc_move_range(filp, arg); default: return -ENOTTY; } @@ -2163,6 +2293,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_WRITE_CHECKPOINT: case F2FS_IOC_DEFRAGMENT: break; + case F2FS_IOC_MOVE_RANGE: + break; default: return -ENOIOCTLCMD; } From dd11a5df5219b4d3c4d3f38b9cae48c3518d3152 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 19 Jul 2016 19:20:11 -0700 Subject: [PATCH 65/68] f2fs: avoid data race when deciding checkpoin in f2fs_sync_file When fs utilization is almost full, f2fs_sync_file should do checkpoint if there is not enough space for roll-forward later. (i.e. space_for_roll_forward) So, currently we have no lock for sbi->alloc_valid_block_count, resulting in race condition. In rare case, we can get -ENOSPC when doing roll-forward which triggers if (is_valid_blkaddr(sbi, dest, META_POR)) { if (src == NULL_ADDR) { err = reserve_new_block(&dn); f2fs_bug_on(sbi, err); ... } ... } in do_recover_data. So, this patch avoids that situation in advance. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7a57279b2c54..30981094dff8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1147,24 +1147,33 @@ static inline void f2fs_i_blocks_write(struct inode *, blkcnt_t, bool); static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count) { + blkcnt_t diff; + #ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(FAULT_BLOCK)) return false; #endif + /* + * let's increase this in prior to actual block count change in order + * for f2fs_sync_file to avoid data races when deciding checkpoint. + */ + percpu_counter_add(&sbi->alloc_valid_block_count, (*count)); + spin_lock(&sbi->stat_lock); sbi->total_valid_block_count += (block_t)(*count); if (unlikely(sbi->total_valid_block_count > sbi->user_block_count)) { - *count -= sbi->total_valid_block_count - sbi->user_block_count; + diff = sbi->total_valid_block_count - sbi->user_block_count; + *count -= diff; sbi->total_valid_block_count = sbi->user_block_count; if (!*count) { spin_unlock(&sbi->stat_lock); + percpu_counter_sub(&sbi->alloc_valid_block_count, diff); return false; } } spin_unlock(&sbi->stat_lock); f2fs_i_blocks_write(inode, *count, true); - percpu_counter_add(&sbi->alloc_valid_block_count, (*count)); return true; } From 6f3ec9952c13f0adf632e89456df43946cec6525 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 19 Jul 2016 19:30:06 -0700 Subject: [PATCH 66/68] f2fs: handle error case with f2fs_bug_on It's enough to show BUG or WARN by f2fs_bug_on for error case. Then, we don't need to remain corrupted filesystem. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 5d4461f2c266..9e652d5a659b 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -482,6 +482,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, #endif /* We should not get -ENOSPC */ f2fs_bug_on(sbi, err); + if (err) + goto err; } /* Check the previous node page having this index */ From fe94793e555f650fab656649521fc38aaab4874e Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Fri, 22 Jul 2016 19:08:31 +0800 Subject: [PATCH 67/68] f2fs: get victim segment again after new cp Previous selected segment may become free after write_checkpoint, if we do garbage collect on this segment, and then new_curseg happen to reuse it, it may cause f2fs_bug_on as below. panic+0x154/0x29c do_garbage_collect+0x15c/0xaf4 f2fs_gc+0x2dc/0x444 f2fs_balance_fs.part.22+0xcc/0x14c f2fs_balance_fs+0x28/0x34 f2fs_map_blocks+0x5ec/0x790 f2fs_preallocate_blocks+0xe0/0x100 f2fs_file_write_iter+0x64/0x11c new_sync_write+0xac/0x11c vfs_write+0x144/0x1e4 SyS_write+0x60/0xc0 Here, maybe we check sit and ssa type during reset_curseg. So, we check segment is stale or not, and select a new victim to avoid this. Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index de6c41c32c62..06cfb94cc3db 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -908,10 +908,13 @@ gc_more: * enough free sections, we should flush dent/node blocks and do * garbage collections. */ - if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi)) + if (__get_victim(sbi, &segno, gc_type) || + prefree_segments(sbi)) { write_checkpoint(sbi, &cpc); - else if (has_not_enough_free_secs(sbi, 0)) + segno = NULL_SEGNO; + } else if (has_not_enough_free_secs(sbi, 0)) { write_checkpoint(sbi, &cpc); + } } if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type)) From 5302fb000def84100740a84d7f176c0e167b2141 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 22 Jul 2016 15:25:47 -0700 Subject: [PATCH 68/68] f2fs: clean up coding style and redundancy This patch includes minor clean-ups. Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.h | 2 +- fs/f2fs/data.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 997ca8edb6cb..b2334d11dae8 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -37,7 +37,7 @@ struct f2fs_acl_header { #ifdef CONFIG_F2FS_FS_POSIX_ACL extern struct posix_acl *f2fs_get_acl(struct inode *, int); -extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type); +extern int f2fs_set_acl(struct inode *, struct posix_acl *, int); extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, struct page *); #else diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 87a458fb8afe..614154f9bb35 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1731,7 +1731,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (test_opt(F2FS_I_SB(inode), LFS)) return 0; - trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); + trace_f2fs_direct_IO_enter(inode, offset, count, rw); down_read(&F2FS_I(inode)->dio_rwsem[rw]); err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); @@ -1744,7 +1744,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) f2fs_write_failed(mapping, offset + count); } - trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err); + trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); return err; }