Btrfs: Use async helpers to deal with pages that have been improperly dirtied

Higher layers sometimes call set_page_dirty without asking the filesystem
to help.  This causes many problems for the data=ordered and cow code.
This commit detects pages that haven't been properly setup for IO and
kicks off an async helper to deal with them.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Chris Mason 2008-07-17 12:53:51 -04:00
parent e6dcd2dc9c
commit 247e743cbe
6 changed files with 106 additions and 9 deletions

View file

@ -546,6 +546,12 @@ struct btrfs_fs_info {
struct btrfs_workers endio_workers; struct btrfs_workers endio_workers;
struct btrfs_workers endio_write_workers; struct btrfs_workers endio_write_workers;
struct btrfs_workers submit_workers; struct btrfs_workers submit_workers;
/*
* fixup workers take dirty pages that didn't properly go through
* the cow mechanism and make them safe to write. It happens
* for the sys_munmap function call path
*/
struct btrfs_workers fixup_workers;
struct task_struct *transaction_kthread; struct task_struct *transaction_kthread;
struct task_struct *cleaner_kthread; struct task_struct *cleaner_kthread;
int thread_pool_size; int thread_pool_size;

View file

@ -1329,11 +1329,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
*/ */
btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size);
btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size);
btrfs_init_workers(&fs_info->fixup_workers, 1);
btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
btrfs_init_workers(&fs_info->endio_write_workers, btrfs_init_workers(&fs_info->endio_write_workers,
fs_info->thread_pool_size); fs_info->thread_pool_size);
btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->workers, 1);
btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1);
btrfs_start_workers(&fs_info->fixup_workers, 1);
btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
btrfs_start_workers(&fs_info->endio_write_workers, btrfs_start_workers(&fs_info->endio_write_workers,
fs_info->thread_pool_size); fs_info->thread_pool_size);
@ -1454,6 +1456,7 @@ fail_tree_root:
fail_sys_array: fail_sys_array:
fail_sb_buffer: fail_sb_buffer:
extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
btrfs_stop_workers(&fs_info->fixup_workers);
btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->workers);
btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_workers);
btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->endio_write_workers);
@ -1710,6 +1713,7 @@ int close_ctree(struct btrfs_root *root)
truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
btrfs_stop_workers(&fs_info->fixup_workers);
btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->workers);
btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_workers);
btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->endio_write_workers);

View file

@ -2050,6 +2050,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
lock_extent(tree, start, page_end, GFP_NOFS); lock_extent(tree, start, page_end, GFP_NOFS);
unlock_start = start; unlock_start = start;
if (tree->ops && tree->ops->writepage_start_hook) {
ret = tree->ops->writepage_start_hook(page, start, page_end);
if (ret == -EAGAIN) {
unlock_extent(tree, start, page_end, GFP_NOFS);
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
}
}
end = page_end; end = page_end;
if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
printk("found delalloc bits after lock_extent\n"); printk("found delalloc bits after lock_extent\n");

View file

@ -30,6 +30,7 @@ typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
struct bio *bio, int mirror_num); struct bio *bio, int mirror_num);
struct extent_io_ops { struct extent_io_ops {
int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
int (*writepage_io_hook)(struct page *page, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
extent_submit_bio_hook_t *submit_bio_hook; extent_submit_bio_hook_t *submit_bio_hook;
int (*merge_bio_hook)(struct page *page, unsigned long offset, int (*merge_bio_hook)(struct page *page, unsigned long offset,

View file

@ -313,6 +313,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
struct page *p = pages[i]; struct page *p = pages[i];
SetPageUptodate(p); SetPageUptodate(p);
ClearPageChecked(p);
set_page_dirty(p); set_page_dirty(p);
} }
} else { } else {

View file

@ -403,6 +403,87 @@ static int add_pending_csums(struct btrfs_trans_handle *trans,
return 0; return 0;
} }
struct btrfs_writepage_fixup {
struct page *page;
struct btrfs_work work;
};
/* see btrfs_writepage_start_hook for details on why this is required */
void btrfs_writepage_fixup_worker(struct btrfs_work *work)
{
struct btrfs_writepage_fixup *fixup;
struct btrfs_ordered_extent *ordered;
struct page *page;
struct inode *inode;
u64 page_start;
u64 page_end;
fixup = container_of(work, struct btrfs_writepage_fixup, work);
page = fixup->page;
lock_page(page);
if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
ClearPageChecked(page);
goto out_page;
}
inode = page->mapping->host;
page_start = page_offset(page);
page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
ordered = btrfs_lookup_ordered_extent(inode, page_start);
if (ordered)
goto out;
set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end,
GFP_NOFS);
ClearPageChecked(page);
out:
unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
out_page:
unlock_page(page);
page_cache_release(page);
}
/*
* There are a few paths in the higher layers of the kernel that directly
* set the page dirty bit without asking the filesystem if it is a
* good idea. This causes problems because we want to make sure COW
* properly happens and the data=ordered rules are followed.
*
* In our case any range that doesn't have the EXTENT_ORDERED bit set
* hasn't been properly setup for IO. We kick off an async process
* to fix it up. The async helper will wait for ordered extents, set
* the delalloc bit and make it safe to write the page.
*/
int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
{
struct inode *inode = page->mapping->host;
struct btrfs_writepage_fixup *fixup;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
EXTENT_ORDERED, 0);
if (ret)
return 0;
if (PageChecked(page))
return -EAGAIN;
fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
if (!fixup)
return -EAGAIN;
printk("queueing worker to fixup page %lu %Lu\n", inode->i_ino, page_offset(page));
SetPageChecked(page);
page_cache_get(page);
fixup->work.func = btrfs_writepage_fixup_worker;
fixup->page = page;
btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work);
return -EAGAIN;
}
int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate) struct extent_state *state, int uptodate)
{ {
@ -1263,6 +1344,7 @@ again:
flush_dcache_page(page); flush_dcache_page(page);
kunmap(page); kunmap(page);
} }
ClearPageChecked(page);
set_page_dirty(page); set_page_dirty(page);
unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
@ -2658,6 +2740,7 @@ again:
flush_dcache_page(page); flush_dcache_page(page);
kunmap(page); kunmap(page);
} }
ClearPageChecked(page);
set_page_dirty(page); set_page_dirty(page);
unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
@ -3039,15 +3122,6 @@ out_fail:
static int btrfs_set_page_dirty(struct page *page) static int btrfs_set_page_dirty(struct page *page)
{ {
struct inode *inode = page->mapping->host;
u64 page_start = page_offset(page);
u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
if (!test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
EXTENT_DELALLOC, 0)) {
printk("inode %lu page %Lu not delalloc\n", inode->i_ino, page_offset(page));
WARN_ON(1);
}
return __set_page_dirty_nobuffers(page); return __set_page_dirty_nobuffers(page);
} }
@ -3098,6 +3172,7 @@ static struct extent_io_ops btrfs_extent_io_ops = {
.readpage_io_hook = btrfs_readpage_io_hook, .readpage_io_hook = btrfs_readpage_io_hook,
.readpage_end_io_hook = btrfs_readpage_end_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook,
.writepage_end_io_hook = btrfs_writepage_end_io_hook, .writepage_end_io_hook = btrfs_writepage_end_io_hook,
.writepage_start_hook = btrfs_writepage_start_hook,
.readpage_io_failed_hook = btrfs_io_failed_hook, .readpage_io_failed_hook = btrfs_io_failed_hook,
.set_bit_hook = btrfs_set_bit_hook, .set_bit_hook = btrfs_set_bit_hook,
.clear_bit_hook = btrfs_clear_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook,