diff --git a/fs/buffer.c b/fs/buffer.c index 0f9006714230..02ebb1f1d3b0 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2194,6 +2194,52 @@ int generic_commit_write(struct file *file, struct page *page, return 0; } +/* + * block_page_mkwrite() is not allowed to change the file size as it gets + * called from a page fault handler when a page is first dirtied. Hence we must + * be careful to check for EOF conditions here. We set the page up correctly + * for a written page which means we get ENOSPC checking when writing into + * holes and correct delalloc and unwritten extent mapping on filesystems that + * support these features. + * + * We are not allowed to take the i_mutex here so we have to play games to + * protect against truncate races as the page could now be beyond EOF. Because + * vmtruncate() writes the inode size before removing pages, once we have the + * page lock we can determine safely if the page is beyond EOF. If it is not + * beyond EOF, then the page is guaranteed safe against truncation until we + * unlock the page. + */ +int +block_page_mkwrite(struct vm_area_struct *vma, struct page *page, + get_block_t get_block) +{ + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + unsigned long end; + loff_t size; + int ret = -EINVAL; + + lock_page(page); + size = i_size_read(inode); + if ((page->mapping != inode->i_mapping) || + ((page->index << PAGE_CACHE_SHIFT) > size)) { + /* page got truncated out from underneath us */ + goto out_unlock; + } + + /* page is wholly or partially inside EOF */ + if (((page->index + 1) << PAGE_CACHE_SHIFT) > size) + end = size & ~PAGE_CACHE_MASK; + else + end = PAGE_CACHE_SIZE; + + ret = block_prepare_write(page, 0, end, get_block); + if (!ret) + ret = block_commit_write(page, 0, end); + +out_unlock: + unlock_page(page); + return ret; +} /* * nobh_prepare_write()'s prereads are special: the buffer_heads are freed @@ -2977,6 +3023,7 @@ EXPORT_SYMBOL(__brelse); EXPORT_SYMBOL(__wait_on_buffer); EXPORT_SYMBOL(block_commit_write); EXPORT_SYMBOL(block_prepare_write); +EXPORT_SYMBOL(block_page_mkwrite); EXPORT_SYMBOL(block_read_full_page); EXPORT_SYMBOL(block_sync_page); EXPORT_SYMBOL(block_truncate_page); diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 2d4be2f247b2..0d4001eafd16 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -413,6 +413,20 @@ xfs_file_open_exec( } #endif /* HAVE_FOP_OPEN_EXEC */ +/* + * mmap()d file has taken write protection fault and is being made + * writable. We can set the page state up correctly for a writable + * page, which means we can do correct delalloc accounting (ENOSPC + * checking!) and unwritten extent mapping. + */ +STATIC int +xfs_vm_page_mkwrite( + struct vm_area_struct *vma, + struct page *page) +{ + return block_page_mkwrite(vma, page, xfs_get_blocks); +} + const struct file_operations xfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -465,11 +479,13 @@ const struct file_operations xfs_dir_file_operations = { static struct vm_operations_struct xfs_file_vm_ops = { .fault = filemap_fault, + .page_mkwrite = xfs_vm_page_mkwrite, }; #ifdef CONFIG_XFS_DMAPI static struct vm_operations_struct xfs_dmapi_file_vm_ops = { .fault = xfs_vm_fault, + .page_mkwrite = xfs_vm_page_mkwrite, #ifdef HAVE_VMOP_MPROTECT .mprotect = xfs_vm_mprotect, #endif diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 79b522779aa4..1a5ad8cd97b0 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -589,7 +589,30 @@ xfs_setattr( code = xfs_igrow_start(ip, vap->va_size, credp); } xfs_iunlock(ip, XFS_ILOCK_EXCL); - vn_iowait(vp); /* wait for the completion of any pending DIOs */ + + /* + * We are going to log the inode size change in this + * transaction so any previous writes that are beyond the on + * disk EOF and the new EOF that have not been written out need + * to be written here. If we do not write the data out, we + * expose ourselves to the null files problem. + * + * Only flush from the on disk size to the smaller of the in + * memory file size or the new size as that's the range we + * really care about here and prevents waiting for other data + * not within the range we care about here. + */ + if (!code && + (ip->i_size != ip->i_d.di_size) && + (vap->va_size > ip->i_d.di_size)) { + code = bhv_vop_flush_pages(XFS_ITOV(ip), + ip->i_d.di_size, vap->va_size, + XFS_B_ASYNC, FI_NONE); + } + + /* wait for all I/O to complete */ + vn_iowait(vp); + if (!code) code = xfs_itruncate_data(ip, vap->va_size); if (code) { @@ -4434,9 +4457,12 @@ xfs_free_file_space( while (!error && !done) { /* - * allocate and setup the transaction + * allocate and setup the transaction. Allow this + * transaction to dip into the reserve blocks to ensure + * the freeing of the space succeeds at ENOSPC. */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); + tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 5c6e12853a9b..35cadad84b14 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -209,6 +209,8 @@ int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*, int generic_cont_expand(struct inode *inode, loff_t size); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); +int block_page_mkwrite(struct vm_area_struct *vma, struct page *page, + get_block_t get_block); void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int generic_commit_write(struct file *, struct page *, unsigned, unsigned);