diff --git a/block/blk.h b/block/blk.h index da722eb786df..c43926d3d74d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -72,8 +72,6 @@ void blk_dequeue_request(struct request *rq); void __blk_queue_free_tags(struct request_queue *q); bool __blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes); -int blk_queue_enter(struct request_queue *q, gfp_t gfp); -void blk_queue_exit(struct request_queue *q); void blk_freeze_queue(struct request_queue *q); static inline void blk_queue_enter_live(struct request_queue *q) diff --git a/fs/Kconfig b/fs/Kconfig index da3f32f1a4e4..6ce72d8d1ee1 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -46,6 +46,12 @@ config FS_DAX or if unsure, say N. Saying Y will increase the size of the kernel by about 5kB. +config FS_DAX_PMD + bool + default FS_DAX + depends on FS_DAX + depends on BROKEN + endif # BLOCK # Posix ACL utility routines diff --git a/fs/block_dev.c b/fs/block_dev.c index bb0dfb1c7af1..c25639e907bd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -390,9 +390,17 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, struct page *page) { const struct block_device_operations *ops = bdev->bd_disk->fops; + int result = -EOPNOTSUPP; + if (!ops->rw_page || bdev_get_integrity(bdev)) - return -EOPNOTSUPP; - return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); + return result; + + result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL); + if (result) + return result; + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); + blk_queue_exit(bdev->bd_queue); + return result; } EXPORT_SYMBOL_GPL(bdev_read_page); @@ -421,14 +429,20 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, int result; int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE; const struct block_device_operations *ops = bdev->bd_disk->fops; + if (!ops->rw_page || bdev_get_integrity(bdev)) return -EOPNOTSUPP; + result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL); + if (result) + return result; + set_page_writeback(page); result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw); if (result) end_page_writeback(page); else unlock_page(page); + blk_queue_exit(bdev->bd_queue); return result; } EXPORT_SYMBOL_GPL(bdev_write_page); diff --git a/fs/dax.c b/fs/dax.c index d1e5cb7311a1..43671b68220e 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -541,6 +541,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, unsigned long pfn; int result = 0; + /* dax pmd mappings are broken wrt gup and fork */ + if (!IS_ENABLED(CONFIG_FS_DAX_PMD)) + return VM_FAULT_FALLBACK; + /* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) return VM_FAULT_FALLBACK; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 3a71cea68420..748d35afc902 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -569,6 +569,8 @@ static int parse_options(char *options, struct super_block *sb) /* Fall through */ case Opt_dax: #ifdef CONFIG_FS_DAX + ext2_msg(sb, KERN_WARNING, + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); set_opt(sbi->s_mount_opt, DAX); #else ext2_msg(sb, KERN_INFO, "dax option not supported"); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 753f4e68b820..c9ab67da6e5a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1664,8 +1664,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, } sbi->s_jquota_fmt = m->mount_opt; #endif -#ifndef CONFIG_FS_DAX } else if (token == Opt_dax) { +#ifdef CONFIG_FS_DAX + ext4_msg(sb, KERN_WARNING, + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); + sbi->s_mount_opt |= m->mount_opt; +#else ext4_msg(sb, KERN_INFO, "dax option not supported"); return -1; #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3fe27f8d91f0..c0d2b7927c1f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -794,6 +794,8 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); +extern int blk_queue_enter(struct request_queue *q, gfp_t gfp); +extern void blk_queue_exit(struct request_queue *q); extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); diff --git a/mm/memory.c b/mm/memory.c index deb679c31f2a..c387430f06c3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3015,9 +3015,9 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, } else { /* * The fault handler has no page to lock, so it holds - * i_mmap_lock for write to protect against truncate. + * i_mmap_lock for read to protect against truncate. */ - i_mmap_unlock_write(vma->vm_file->f_mapping); + i_mmap_unlock_read(vma->vm_file->f_mapping); } goto uncharge_out; } @@ -3031,9 +3031,9 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, } else { /* * The fault handler has no page to lock, so it holds - * i_mmap_lock for write to protect against truncate. + * i_mmap_lock for read to protect against truncate. */ - i_mmap_unlock_write(vma->vm_file->f_mapping); + i_mmap_unlock_read(vma->vm_file->f_mapping); } return ret; uncharge_out: