Merge branch 'upstream-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'upstream-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (50 commits)
  ext4,jbd2: convert tracepoints to use major/minor numbers
  ext4: optimize orphan_list handling for ext4_setattr
  ext4: fix unbalanced mutex unlock in error path of ext4_li_request_new
  ext4: fix compile error in ext4_fallocate()
  ext4: move ext4_mb_{get,put}_buddy_cache_lock and make them static
  ext4: rename mark_bitmap_end() to ext4_mark_bitmap_end()
  ext4: move flush_completed_IO to fs/ext4/fsync.c and make it static
  ext4: rename {ext,idx}_pblock and inline small extent functions
  ext4: make various ext4 functions be static
  ext4: rename {exit,init}_ext4_*() to ext4_{exit,init}_*()
  ext4: fix kernel oops if the journal superblock has a non-zero j_errno
  ext4: update writeback_index based on last page scanned
  ext4: implement writeback livelock avoidance using page tagging
  ext4: tidy up a void argument in inode.c
  ext4: add batched_discard into ext4 feature list
  ext4: Add batched discard support for ext4
  fs: Add FITRIM ioctl
  ext4: Use return value from sb_issue_discard()
  ext4: Check return value of sb_getblk() and friends
  ext4: use bio layer instead of buffer layer in mpage_da_submit_io
  ...
This commit is contained in:
Linus Torvalds 2010-10-27 21:54:31 -07:00
commit 81280572ca
33 changed files with 2513 additions and 1134 deletions

View file

@ -353,6 +353,20 @@ noauto_da_alloc replacing existing files via patterns such as
system crashes before the delayed allocation
blocks are forced to disk.
noinit_itable Do not initialize any uninitialized inode table
blocks in the background. This feature may be
used by installation CD's so that the install
process can complete as quickly as possible; the
inode table initialization process would then be
deferred until the next time the file system
is unmounted.
init_itable=n The lazy itable init code will wait n times the
number of milliseconds it took to zero out the
previous block group's inode table. This
minimizes the impact on the systme performance
while file system's inode table is being initialized.
discard Controls whether ext4 should issue discard/TRIM
nodiscard(*) commands to the underlying block device when
blocks are freed. This is useful for SSD devices

View file

@ -4,7 +4,7 @@
obj-$(CONFIG_EXT4_FS) += ext4.o
ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o

View file

@ -171,7 +171,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
* less than the blocksize * 8 ( which is the size
* of bitmap ), set rest of the block bitmap to 1
*/
mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8,
bh->b_data);
}
return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
}
@ -489,7 +490,7 @@ error_return:
* Check if filesystem has nblocks free & available for allocation.
* On success return 1, return 0 on failure.
*/
int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
{
s64 free_blocks, dirty_blocks, root_blocks;
struct percpu_counter *fbc = &sbi->s_freeblocks_counter;

View file

@ -29,16 +29,15 @@ struct ext4_system_zone {
static struct kmem_cache *ext4_system_zone_cachep;
int __init init_ext4_system_zone(void)
int __init ext4_init_system_zone(void)
{
ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
SLAB_RECLAIM_ACCOUNT);
ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0);
if (ext4_system_zone_cachep == NULL)
return -ENOMEM;
return 0;
}
void exit_ext4_system_zone(void)
void ext4_exit_system_zone(void)
{
kmem_cache_destroy(ext4_system_zone_cachep);
}

View file

@ -39,7 +39,7 @@ static int ext4_release_dir(struct inode *inode,
struct file *filp);
const struct file_operations ext4_dir_operations = {
.llseek = generic_file_llseek,
.llseek = ext4_llseek,
.read = generic_read_dir,
.readdir = ext4_readdir, /* we take BKL. needed?*/
.unlocked_ioctl = ext4_ioctl,

View file

@ -168,7 +168,20 @@ struct mpage_da_data {
int pages_written;
int retval;
};
#define EXT4_IO_UNWRITTEN 0x1
/*
* Flags for ext4_io_end->flags
*/
#define EXT4_IO_END_UNWRITTEN 0x0001
#define EXT4_IO_END_ERROR 0x0002
struct ext4_io_page {
struct page *p_page;
int p_count;
};
#define MAX_IO_PAGES 128
typedef struct ext4_io_end {
struct list_head list; /* per-file finished IO list */
struct inode *inode; /* file being written to */
@ -179,8 +192,18 @@ typedef struct ext4_io_end {
struct work_struct work; /* data work queue */
struct kiocb *iocb; /* iocb struct for AIO */
int result; /* error value for AIO */
int num_io_pages;
struct ext4_io_page *pages[MAX_IO_PAGES];
} ext4_io_end_t;
struct ext4_io_submit {
int io_op;
struct bio *io_bio;
ext4_io_end_t *io_end;
struct ext4_io_page *io_page;
sector_t io_next_block;
};
/*
* Special inodes numbers
*/
@ -205,6 +228,7 @@ typedef struct ext4_io_end {
#define EXT4_MIN_BLOCK_SIZE 1024
#define EXT4_MAX_BLOCK_SIZE 65536
#define EXT4_MIN_BLOCK_LOG_SIZE 10
#define EXT4_MAX_BLOCK_LOG_SIZE 16
#ifdef __KERNEL__
# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
#else
@ -889,6 +913,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
@ -1087,7 +1112,6 @@ struct ext4_sb_info {
struct completion s_kobj_unregister;
/* Journaling */
struct inode *s_journal_inode;
struct journal_s *s_journal;
struct list_head s_orphan;
struct mutex s_orphan_lock;
@ -1120,10 +1144,7 @@ struct ext4_sb_info {
/* for buddy allocator */
struct ext4_group_info ***s_group_info;
struct inode *s_buddy_cache;
long s_blocks_reserved;
spinlock_t s_reserve_lock;
spinlock_t s_md_lock;
tid_t s_last_transaction;
unsigned short *s_mb_offsets;
unsigned int *s_mb_maxs;
@ -1141,7 +1162,6 @@ struct ext4_sb_info {
unsigned long s_mb_last_start;
/* stats for buddy allocator */
spinlock_t s_mb_pa_lock;
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
atomic_t s_bal_success; /* we found long enough chunks */
atomic_t s_bal_allocated; /* in blocks */
@ -1172,6 +1192,11 @@ struct ext4_sb_info {
/* timer for periodic error stats printing */
struct timer_list s_err_report;
/* Lazy inode table initialization info */
struct ext4_li_request *s_li_request;
/* Wait multiplier for lazy initialization thread */
unsigned int s_li_wait_mult;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@ -1533,7 +1558,42 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
extern struct proc_dir_entry *ext4_proc_root;
/*
* Timeout and state flag for lazy initialization inode thread.
*/
#define EXT4_DEF_LI_WAIT_MULT 10
#define EXT4_DEF_LI_MAX_START_DELAY 5
#define EXT4_LAZYINIT_QUIT 0x0001
#define EXT4_LAZYINIT_RUNNING 0x0002
/*
* Lazy inode table initialization info
*/
struct ext4_lazy_init {
unsigned long li_state;
wait_queue_head_t li_wait_daemon;
wait_queue_head_t li_wait_task;
struct timer_list li_timer;
struct task_struct *li_task;
struct list_head li_request_list;
struct mutex li_list_mtx;
};
struct ext4_li_request {
struct super_block *lr_super;
struct ext4_sb_info *lr_sbi;
ext4_group_t lr_next_group;
struct list_head lr_request;
unsigned long lr_next_sched;
unsigned long lr_timeout;
};
struct ext4_features {
struct kobject f_kobj;
struct completion f_kobj_unregister;
};
/*
* Function prototypes
@ -1561,7 +1621,6 @@ extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp);
extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count);
extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
@ -1605,11 +1664,9 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
extern unsigned long ext4_count_free_inodes(struct super_block *);
extern unsigned long ext4_count_dirs(struct super_block *);
extern void ext4_check_inodes_bitmap(struct super_block *);
extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
struct buffer_head *bh,
ext4_group_t group,
struct ext4_group_desc *desc);
extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
extern int ext4_init_inode_table(struct super_block *sb,
ext4_group_t group, int barrier);
/* mballoc.c */
extern long ext4_mb_stats;
@ -1620,16 +1677,15 @@ extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
struct ext4_allocation_request *, int *);
extern int ext4_mb_reserve_blocks(struct super_block *, int);
extern void ext4_discard_preallocations(struct inode *);
extern int __init init_ext4_mballoc(void);
extern void exit_ext4_mballoc(void);
extern int __init ext4_init_mballoc(void);
extern void ext4_exit_mballoc(void);
extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t block,
unsigned long count, int flags);
extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc);
extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
ext4_group_t, int);
extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
/* inode.c */
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
ext4_lblk_t, int, int *);
@ -1657,13 +1713,11 @@ extern void ext4_get_inode_flags(struct ext4_inode_info *);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int flush_completed_IO(struct inode *inode);
extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
/* ioctl.c */
@ -1960,6 +2014,7 @@ extern const struct file_operations ext4_dir_operations;
/* file.c */
extern const struct inode_operations ext4_file_inode_operations;
extern const struct file_operations ext4_file_operations;
extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
/* namei.c */
extern const struct inode_operations ext4_dir_inode_operations;
@ -1973,8 +2028,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
/* block_validity */
extern void ext4_release_system_zone(struct super_block *sb);
extern int ext4_setup_system_zone(struct super_block *sb);
extern int __init init_ext4_system_zone(void);
extern void exit_ext4_system_zone(void);
extern int __init ext4_init_system_zone(void);
extern void ext4_exit_system_zone(void);
extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
ext4_fsblk_t start_blk,
unsigned int count);
@ -2002,6 +2057,17 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
__u64 start_orig, __u64 start_donor,
__u64 len, __u64 *moved_len);
/* page-io.c */
extern int __init ext4_init_pageio(void);
extern void ext4_exit_pageio(void);
extern void ext4_free_io_end(ext4_io_end_t *io);
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
extern int ext4_end_io_nolock(ext4_io_end_t *io);
extern void ext4_io_submit(struct ext4_io_submit *io);
extern int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
int len,
struct writeback_control *wbc);
/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
enum ext4_state_bits {

View file

@ -225,11 +225,60 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
}
/*
* ext4_ext_pblock:
* combine low and high parts of physical block number into ext4_fsblk_t
*/
static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
{
ext4_fsblk_t block;
block = le32_to_cpu(ex->ee_start_lo);
block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
return block;
}
/*
* ext4_idx_pblock:
* combine low and high parts of a leaf physical block number into ext4_fsblk_t
*/
static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix)
{
ext4_fsblk_t block;
block = le32_to_cpu(ix->ei_leaf_lo);
block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
return block;
}
/*
* ext4_ext_store_pblock:
* stores a large physical block number into an extent struct,
* breaking it into parts
*/
static inline void ext4_ext_store_pblock(struct ext4_extent *ex,
ext4_fsblk_t pb)
{
ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
0xffff);
}
/*
* ext4_idx_store_pblock:
* stores a large physical block number into an index struct,
* breaking it into parts
*/
static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
ext4_fsblk_t pb)
{
ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
0xffff);
}
extern int ext4_ext_calc_metadata_amount(struct inode *inode,
sector_t lblocks);
extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
int num,
@ -237,19 +286,9 @@ extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
extern int ext4_can_extents_be_merged(struct inode *inode,
struct ext4_extent *ex1,
struct ext4_extent *ex2);
extern int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *);
extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int);
extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t,
ext_prepare_callback, void *);
extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
struct ext4_ext_path *);
extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
ext4_lblk_t *, ext4_fsblk_t *);
extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
ext4_lblk_t *, ext4_fsblk_t *);
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode);
#endif /* _EXT4_EXTENTS */

View file

@ -44,55 +44,6 @@
#include "ext4_jbd2.h"
#include "ext4_extents.h"
/*
* ext_pblock:
* combine low and high parts of physical block number into ext4_fsblk_t
*/
ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
{
ext4_fsblk_t block;
block = le32_to_cpu(ex->ee_start_lo);
block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
return block;
}
/*
* idx_pblock:
* combine low and high parts of a leaf physical block number into ext4_fsblk_t
*/
ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
{
ext4_fsblk_t block;
block = le32_to_cpu(ix->ei_leaf_lo);
block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
return block;
}
/*
* ext4_ext_store_pblock:
* stores a large physical block number into an extent struct,
* breaking it into parts
*/
void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
{
ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
}
/*
* ext4_idx_store_pblock:
* stores a large physical block number into an index struct,
* breaking it into parts
*/
static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
{
ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
}
static int ext4_ext_truncate_extend_restart(handle_t *handle,
struct inode *inode,
int needed)
@ -169,7 +120,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
/* try to predict block placement */
ex = path[depth].p_ext;
if (ex)
return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));
return (ext4_ext_pblock(ex) +
(block - le32_to_cpu(ex->ee_block)));
/* it looks like index is empty;
* try to find starting block from index itself */
@ -354,7 +306,7 @@ ext4_ext_max_entries(struct inode *inode, int depth)
static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{
ext4_fsblk_t block = ext_pblock(ext);
ext4_fsblk_t block = ext4_ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
@ -363,7 +315,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
static int ext4_valid_extent_idx(struct inode *inode,
struct ext4_extent_idx *ext_idx)
{
ext4_fsblk_t block = idx_pblock(ext_idx);
ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
}
@ -463,13 +415,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
for (k = 0; k <= l; k++, path++) {
if (path->p_idx) {
ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block),
idx_pblock(path->p_idx));
ext4_idx_pblock(path->p_idx));
} else if (path->p_ext) {
ext_debug(" %d:[%d]%d:%llu ",
le32_to_cpu(path->p_ext->ee_block),
ext4_ext_is_uninitialized(path->p_ext),
ext4_ext_get_actual_len(path->p_ext),
ext_pblock(path->p_ext));
ext4_ext_pblock(path->p_ext));
} else
ext_debug(" []");
}
@ -494,7 +446,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
ext4_ext_is_uninitialized(ex),
ext4_ext_get_actual_len(ex), ext_pblock(ex));
ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
}
ext_debug("\n");
}
@ -545,7 +497,7 @@ ext4_ext_binsearch_idx(struct inode *inode,
path->p_idx = l - 1;
ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
idx_pblock(path->p_idx));
ext4_idx_pblock(path->p_idx));
#ifdef CHECK_BINSEARCH
{
@ -614,7 +566,7 @@ ext4_ext_binsearch(struct inode *inode,
path->p_ext = l - 1;
ext_debug(" -> %d:%llu:[%d]%d ",
le32_to_cpu(path->p_ext->ee_block),
ext_pblock(path->p_ext),
ext4_ext_pblock(path->p_ext),
ext4_ext_is_uninitialized(path->p_ext),
ext4_ext_get_actual_len(path->p_ext));
@ -682,7 +634,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
ext4_ext_binsearch_idx(inode, path + ppos, block);
path[ppos].p_block = idx_pblock(path[ppos].p_idx);
path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
path[ppos].p_depth = i;
path[ppos].p_ext = NULL;
@ -721,7 +673,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
ext4_ext_binsearch(inode, path + ppos, block);
/* if not an empty leaf */
if (path[ppos].p_ext)
path[ppos].p_block = ext_pblock(path[ppos].p_ext);
path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
ext4_ext_show_path(inode, path);
@ -739,9 +691,9 @@ err:
* insert new index [@logical;@ptr] into the block at @curp;
* check where to insert: before @curp or after @curp
*/
int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
struct ext4_ext_path *curp,
int logical, ext4_fsblk_t ptr)
static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
struct ext4_ext_path *curp,
int logical, ext4_fsblk_t ptr)
{
struct ext4_extent_idx *ix;
int len, err;
@ -917,7 +869,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
EXT_MAX_EXTENT(path[depth].p_hdr)) {
ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
le32_to_cpu(path[depth].p_ext->ee_block),
ext_pblock(path[depth].p_ext),
ext4_ext_pblock(path[depth].p_ext),
ext4_ext_is_uninitialized(path[depth].p_ext),
ext4_ext_get_actual_len(path[depth].p_ext),
newblock);
@ -1007,7 +959,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
ext_debug("%d: move %d:%llu in new index %llu\n", i,
le32_to_cpu(path[i].p_idx->ei_block),
idx_pblock(path[i].p_idx),
ext4_idx_pblock(path[i].p_idx),
newblock);
/*memmove(++fidx, path[i].p_idx++,
sizeof(struct ext4_extent_idx));
@ -1146,7 +1098,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
idx_pblock(EXT_FIRST_INDEX(neh)));
ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
neh->eh_depth = cpu_to_le16(path->p_depth + 1);
err = ext4_ext_dirty(handle, inode, curp);
@ -1232,9 +1184,9 @@ out:
* returns 0 at @phys
* return value contains 0 (success) or error code
*/
int
ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
ext4_lblk_t *logical, ext4_fsblk_t *phys)
static int ext4_ext_search_left(struct inode *inode,
struct ext4_ext_path *path,
ext4_lblk_t *logical, ext4_fsblk_t *phys)
{
struct ext4_extent_idx *ix;
struct ext4_extent *ex;
@ -1286,7 +1238,7 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
}
*logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
*phys = ext_pblock(ex) + ee_len - 1;
*phys = ext4_ext_pblock(ex) + ee_len - 1;
return 0;
}
@ -1297,9 +1249,9 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
* returns 0 at @phys
* return value contains 0 (success) or error code
*/
int
ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
ext4_lblk_t *logical, ext4_fsblk_t *phys)
static int ext4_ext_search_right(struct inode *inode,
struct ext4_ext_path *path,
ext4_lblk_t *logical, ext4_fsblk_t *phys)
{
struct buffer_head *bh = NULL;
struct ext4_extent_header *eh;
@ -1342,7 +1294,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
}
}
*logical = le32_to_cpu(ex->ee_block);
*phys = ext_pblock(ex);
*phys = ext4_ext_pblock(ex);
return 0;
}
@ -1357,7 +1309,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
/* next allocated block in this leaf */
ex++;
*logical = le32_to_cpu(ex->ee_block);
*phys = ext_pblock(ex);
*phys = ext4_ext_pblock(ex);
return 0;
}
@ -1376,7 +1328,7 @@ got_index:
* follow it and find the closest allocated
* block to the right */
ix++;
block = idx_pblock(ix);
block = ext4_idx_pblock(ix);
while (++depth < path->p_depth) {
bh = sb_bread(inode->i_sb, block);
if (bh == NULL)
@ -1388,7 +1340,7 @@ got_index:
return -EIO;
}
ix = EXT_FIRST_INDEX(eh);
block = idx_pblock(ix);
block = ext4_idx_pblock(ix);
put_bh(bh);
}
@ -1402,7 +1354,7 @@ got_index:
}
ex = EXT_FIRST_EXTENT(eh);
*logical = le32_to_cpu(ex->ee_block);
*phys = ext_pblock(ex);
*phys = ext4_ext_pblock(ex);
put_bh(bh);
return 0;
}
@ -1573,7 +1525,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
return 0;
#endif
if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2))
if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
return 1;
return 0;
}
@ -1585,9 +1537,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
* Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
* 1 if they got merged.
*/
int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *ex)
static int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *ex)
{
struct ext4_extent_header *eh;
unsigned int depth, len;
@ -1632,9 +1584,9 @@ int ext4_ext_try_to_merge(struct inode *inode,
* such that there will be no overlap, and then returns 1.
* If there is no overlap found, it returns 0.
*/
unsigned int ext4_ext_check_overlap(struct inode *inode,
struct ext4_extent *newext,
struct ext4_ext_path *path)
static unsigned int ext4_ext_check_overlap(struct inode *inode,
struct ext4_extent *newext,
struct ext4_ext_path *path)
{
ext4_lblk_t b1, b2;
unsigned int depth, len1;
@ -1706,11 +1658,12 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
&& ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
le32_to_cpu(ex->ee_block),
ext4_ext_is_uninitialized(ex),
ext4_ext_get_actual_len(ex), ext_pblock(ex));
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
le32_to_cpu(ex->ee_block),
ext4_ext_is_uninitialized(ex),
ext4_ext_get_actual_len(ex),
ext4_ext_pblock(ex));
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
return err;
@ -1780,7 +1733,7 @@ has_space:
/* there is no extent in this leaf, create first one */
ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
le32_to_cpu(newext->ee_block),
ext_pblock(newext),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext));
path[depth].p_ext = EXT_FIRST_EXTENT(eh);
@ -1794,7 +1747,7 @@ has_space:
ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block),
ext_pblock(newext),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
nearex, len, nearex + 1, nearex + 2);
@ -1808,7 +1761,7 @@ has_space:
ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block),
ext_pblock(newext),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
nearex, len, nearex + 1, nearex + 2);
@ -1819,7 +1772,7 @@ has_space:
le16_add_cpu(&eh->eh_entries, 1);
nearex = path[depth].p_ext;
nearex->ee_block = newext->ee_block;
ext4_ext_store_pblock(nearex, ext_pblock(newext));
ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
nearex->ee_len = newext->ee_len;
merge:
@ -1845,9 +1798,9 @@ cleanup:
return err;
}
int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
ext4_lblk_t num, ext_prepare_callback func,
void *cbdata)
static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
ext4_lblk_t num, ext_prepare_callback func,
void *cbdata)
{
struct ext4_ext_path *path = NULL;
struct ext4_ext_cache cbex;
@ -1923,7 +1876,7 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
} else {
cbex.ec_block = le32_to_cpu(ex->ee_block);
cbex.ec_len = ext4_ext_get_actual_len(ex);
cbex.ec_start = ext_pblock(ex);
cbex.ec_start = ext4_ext_pblock(ex);
cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
}
@ -2073,7 +2026,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
/* free index block */
path--;
leaf = idx_pblock(path->p_idx);
leaf = ext4_idx_pblock(path->p_idx);
if (unlikely(path->p_hdr->eh_entries == 0)) {
EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
return -EIO;
@ -2181,7 +2134,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t start;
num = le32_to_cpu(ex->ee_block) + ee_len - from;
start = ext_pblock(ex) + ee_len - num;
start = ext4_ext_pblock(ex) + ee_len - num;
ext_debug("free last %u blocks starting %llu\n", num, start);
ext4_free_blocks(handle, inode, 0, start, num, flags);
} else if (from == le32_to_cpu(ex->ee_block)
@ -2310,7 +2263,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
goto out;
ext_debug("new extent: %u:%u:%llu\n", block, num,
ext_pblock(ex));
ext4_ext_pblock(ex));
ex--;
ex_ee_block = le32_to_cpu(ex->ee_block);
ex_ee_len = ext4_ext_get_actual_len(ex);
@ -2421,9 +2374,9 @@ again:
struct buffer_head *bh;
/* go to the next level */
ext_debug("move to level %d (block %llu)\n",
i + 1, idx_pblock(path[i].p_idx));
i + 1, ext4_idx_pblock(path[i].p_idx));
memset(path + i + 1, 0, sizeof(*path));
bh = sb_bread(sb, idx_pblock(path[i].p_idx));
bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx));
if (!bh) {
/* should we reset i_size? */
err = -EIO;
@ -2535,77 +2488,21 @@ void ext4_ext_release(struct super_block *sb)
#endif
}
static void bi_complete(struct bio *bio, int error)
{
complete((struct completion *)bio->bi_private);
}
/* FIXME!! we need to try to merge to left or right after zero-out */
static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{
ext4_fsblk_t ee_pblock;
unsigned int ee_len;
int ret;
struct bio *bio;
int blkbits, blocksize;
sector_t ee_pblock;
struct completion event;
unsigned int ee_len, len, done, offset;
blkbits = inode->i_blkbits;
blocksize = inode->i_sb->s_blocksize;
ee_len = ext4_ext_get_actual_len(ex);
ee_pblock = ext_pblock(ex);
ee_pblock = ext4_ext_pblock(ex);
/* convert ee_pblock to 512 byte sectors */
ee_pblock = ee_pblock << (blkbits - 9);
ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
if (ret > 0)
ret = 0;
while (ee_len > 0) {
if (ee_len > BIO_MAX_PAGES)
len = BIO_MAX_PAGES;
else
len = ee_len;
bio = bio_alloc(GFP_NOIO, len);
if (!bio)
return -ENOMEM;
bio->bi_sector = ee_pblock;
bio->bi_bdev = inode->i_sb->s_bdev;
done = 0;
offset = 0;
while (done < len) {
ret = bio_add_page(bio, ZERO_PAGE(0),
blocksize, offset);
if (ret != blocksize) {
/*
* We can't add any more pages because of
* hardware limitations. Start a new bio.
*/
break;
}
done++;
offset += blocksize;
if (offset >= PAGE_CACHE_SIZE)
offset = 0;
}
init_completion(&event);
bio->bi_private = &event;
bio->bi_end_io = bi_complete;
submit_bio(WRITE, bio);
wait_for_completion(&event);
if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
bio_put(bio);
return -EIO;
}
bio_put(bio);
ee_len -= done;
ee_pblock += done << (blkbits - 9);
}
return 0;
return ret;
}
#define EXT4_EXT_ZERO_LEN 7
@ -2651,12 +2548,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (map->m_lblk - ee_block);
newblock = map->m_lblk - ee_block + ext_pblock(ex);
newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
ex2 = ex;
orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len);
ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
/*
* It is safe to convert extent to initialized via explicit
@ -2675,7 +2572,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
return allocated;
@ -2710,7 +2607,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex->ee_block = orig_ex.ee_block;
ex->ee_len = cpu_to_le16(ee_len - allocated);
ext4_ext_mark_uninitialized(ex);
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
ex3 = &newex;
@ -2725,7 +2622,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
goto fix_extent_len;
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_store_pblock(ex,
ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* blocks available from map->m_lblk */
return allocated;
@ -2782,7 +2680,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
/* blocks available from map->m_lblk */
@ -2833,7 +2731,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
/* blocks available from map->m_lblk */
@ -2902,7 +2800,7 @@ insert:
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
return allocated;
@ -2915,7 +2813,7 @@ out:
fix_extent_len:
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_mark_uninitialized(ex);
ext4_ext_dirty(handle, inode, path + depth);
return err;
@ -2973,12 +2871,12 @@ static int ext4_split_unwritten_extents(handle_t *handle,
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (map->m_lblk - ee_block);
newblock = map->m_lblk - ee_block + ext_pblock(ex);
newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
ex2 = ex;
orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len);
ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
/*
* It is safe to convert extent to initialized via explicit
@ -3027,7 +2925,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
/* blocks available from map->m_lblk */
@ -3099,7 +2997,7 @@ insert:
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
return allocated;
@ -3112,7 +3010,7 @@ out:
fix_extent_len:
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_mark_uninitialized(ex);
ext4_ext_dirty(handle, inode, path + depth);
return err;
@ -3180,6 +3078,57 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
unmap_underlying_metadata(bdev, block + i);
}
/*
* Handle EOFBLOCKS_FL flag, clearing it if necessary
*/
static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map,
struct ext4_ext_path *path,
unsigned int len)
{
int i, depth;
struct ext4_extent_header *eh;
struct ext4_extent *ex, *last_ex;
if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
return 0;
depth = ext_depth(inode);
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
if (unlikely(!eh->eh_entries)) {
EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
"EOFBLOCKS_FL set");
return -EIO;
}
last_ex = EXT_LAST_EXTENT(eh);
/*
* We should clear the EOFBLOCKS_FL flag if we are writing the
* last block in the last extent in the file. We test this by
* first checking to see if the caller to
* ext4_ext_get_blocks() was interested in the last block (or
* a block beyond the last block) in the current extent. If
* this turns out to be false, we can bail out from this
* function immediately.
*/
if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) +
ext4_ext_get_actual_len(last_ex))
return 0;
/*
* If the caller does appear to be planning to write at or
* beyond the end of the current extent, we then test to see
* if the current extent is the last extent in the file, by
* checking to make sure it was reached via the rightmost node
* at each level of the tree.
*/
for (i = depth-1; i >= 0; i--)
if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
return 0;
ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
return ext4_mark_inode_dirty(handle, inode);
}
static int
ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map,
@ -3206,7 +3155,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
* completed
*/
if (io)
io->flag = EXT4_IO_UNWRITTEN;
io->flag = EXT4_IO_END_UNWRITTEN;
else
ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
if (ext4_should_dioread_nolock(inode))
@ -3217,8 +3166,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
ret = ext4_convert_unwritten_extents_endio(handle, inode,
path);
if (ret >= 0)
if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
err = check_eofblocks_fl(handle, inode, map, path,
map->m_len);
} else
err = ret;
goto out2;
}
/* buffered IO case */
@ -3244,8 +3197,13 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
/* buffered write, writepage time, convert*/
ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
if (ret >= 0)
if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
err = check_eofblocks_fl(handle, inode, map, path, map->m_len);
if (err < 0)
goto out2;
}
out:
if (ret <= 0) {
err = ret;
@ -3292,6 +3250,7 @@ out2:
}
return err ? err : allocated;
}
/*
* Block allocation/map/preallocation routine for extents based files
*
@ -3315,9 +3274,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
{
struct ext4_ext_path *path = NULL;
struct ext4_extent_header *eh;
struct ext4_extent newex, *ex, *last_ex;
struct ext4_extent newex, *ex;
ext4_fsblk_t newblock;
int i, err = 0, depth, ret, cache_type;
int err = 0, depth, ret, cache_type;
unsigned int allocated = 0;
struct ext4_allocation_request ar;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
@ -3341,7 +3300,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
/* block is already allocated */
newblock = map->m_lblk
- le32_to_cpu(newex.ee_block)
+ ext_pblock(&newex);
+ ext4_ext_pblock(&newex);
/* number of remaining blocks in the extent */
allocated = ext4_ext_get_actual_len(&newex) -
(map->m_lblk - le32_to_cpu(newex.ee_block));
@ -3379,7 +3338,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ex = path[depth].p_ext;
if (ex) {
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
ext4_fsblk_t ee_start = ext_pblock(ex);
ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
unsigned short ee_len;
/*
@ -3488,7 +3447,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
*/
if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
if (io)
io->flag = EXT4_IO_UNWRITTEN;
io->flag = EXT4_IO_END_UNWRITTEN;
else
ext4_set_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN);
@ -3497,44 +3456,23 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
map->m_flags |= EXT4_MAP_UNINIT;
}
if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) {
if (unlikely(!eh->eh_entries)) {
EXT4_ERROR_INODE(inode,
"eh->eh_entries == 0 and "
"EOFBLOCKS_FL set");
err = -EIO;
goto out2;
}
last_ex = EXT_LAST_EXTENT(eh);
/*
* If the current leaf block was reached by looking at
* the last index block all the way down the tree, and
* we are extending the inode beyond the last extent
* in the current leaf block, then clear the
* EOFBLOCKS_FL flag.
*/
for (i = depth-1; i >= 0; i--) {
if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
break;
}
if ((i < 0) &&
(map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) +
ext4_ext_get_actual_len(last_ex)))
ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
}
err = check_eofblocks_fl(handle, inode, map, path, ar.len);
if (err)
goto out2;
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err) {
/* free data blocks we just allocated */
/* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free() */
ext4_discard_preallocations(inode);
ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex),
ext4_ext_get_actual_len(&newex), 0);
goto out2;
}
/* previous routine could use block we allocated */
newblock = ext_pblock(&newex);
newblock = ext4_ext_pblock(&newex);
allocated = ext4_ext_get_actual_len(&newex);
if (allocated > map->m_len)
allocated = map->m_len;
@ -3729,7 +3667,7 @@ retry:
printk(KERN_ERR "%s: ext4_ext_map_blocks "
"returned error inode#%lu, block=%u, "
"max_blocks=%u", __func__,
inode->i_ino, block, max_blocks);
inode->i_ino, map.m_lblk, max_blocks);
#endif
ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);

View file

@ -130,8 +130,50 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
return dquot_file_open(inode, filp);
}
/*
* ext4_llseek() copied from generic_file_llseek() to handle both
* block-mapped and extent-mapped maxbytes values. This should
* otherwise be identical with generic_file_llseek().
*/
loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
{
struct inode *inode = file->f_mapping->host;
loff_t maxbytes;
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
else
maxbytes = inode->i_sb->s_maxbytes;
mutex_lock(&inode->i_mutex);
switch (origin) {
case SEEK_END:
offset += inode->i_size;
break;
case SEEK_CUR:
if (offset == 0) {
mutex_unlock(&inode->i_mutex);
return file->f_pos;
}
offset += file->f_pos;
break;
}
if (offset < 0 || offset > maxbytes) {
mutex_unlock(&inode->i_mutex);
return -EINVAL;
}
if (offset != file->f_pos) {
file->f_pos = offset;
file->f_version = 0;
}
mutex_unlock(&inode->i_mutex);
return offset;
}
const struct file_operations ext4_file_operations = {
.llseek = generic_file_llseek,
.llseek = ext4_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,

View file

@ -34,6 +34,89 @@
#include <trace/events/ext4.h>
static void dump_completed_IO(struct inode * inode)
{
#ifdef EXT4_DEBUG
struct list_head *cur, *before, *after;
ext4_io_end_t *io, *io0, *io1;
unsigned long flags;
if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
return;
}
ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
cur = &io->list;
before = cur->prev;
io0 = container_of(before, ext4_io_end_t, list);
after = cur->next;
io1 = container_of(after, ext4_io_end_t, list);
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
io, inode->i_ino, io0, io1);
}
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
#endif
}
/*
* This function is called from ext4_sync_file().
*
* When IO is completed, the work to convert unwritten extents to
* written is queued on workqueue but may not get immediately
* scheduled. When fsync is called, we need to ensure the
* conversion is complete before fsync returns.
* The inode keeps track of a list of pending/completed IO that
* might needs to do the conversion. This function walks through
* the list and convert the related unwritten extents for completed IO
* to written.
* The function return the number of pending IOs on success.
*/
static int flush_completed_IO(struct inode *inode)
{
ext4_io_end_t *io;
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned long flags;
int ret = 0;
int ret2 = 0;
if (list_empty(&ei->i_completed_io_list))
return ret;
dump_completed_IO(inode);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
while (!list_empty(&ei->i_completed_io_list)){
io = list_entry(ei->i_completed_io_list.next,
ext4_io_end_t, list);
/*
* Calling ext4_end_io_nolock() to convert completed
* IO to written.
*
* When ext4_sync_file() is called, run_queue() may already
* about to flush the work corresponding to this io structure.
* It will be upset if it founds the io structure related
* to the work-to-be schedule is freed.
*
* Thus we need to keep the io structure still valid here after
* convertion finished. The io structure has a flag to
* avoid double converting from both fsync and background work
* queue work.
*/
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
ret = ext4_end_io_nolock(io);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
if (ret < 0)
ret2 = ret;
else
list_del_init(&io->list);
}
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
return (ret2 < 0) ? ret2 : 0;
}
/*
* If we're not journaling and this is a just-created file, we have to
* sync our parent directory (if it was freshly created) since

View file

@ -50,7 +50,7 @@
* need to use it within a single byte (to ensure we get endianness right).
* We can use memset for the rest of the bitmap as there are no other users.
*/
void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
{
int i;
@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
}
/* Initializes an uninitialized inode bitmap */
unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
static unsigned ext4_init_inode_bitmap(struct super_block *sb,
struct buffer_head *bh,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
}
memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
bh->b_data);
return EXT4_INODES_PER_GROUP(sb);
@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
return NULL;
bitmap_blk = ext4_inode_bitmap(sb, desc);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
unlock_buffer(bh);
return bh;
}
ext4_lock_group(sb, block_group);
if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
ext4_init_inode_bitmap(sb, bh, block_group, desc);
@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
return bh;
}
ext4_unlock_group(sb, block_group);
if (buffer_uptodate(bh)) {
/*
* if not uninit if bh is uptodate,
@ -411,8 +415,8 @@ struct orlov_stats {
* for a particular block group or flex_bg. If flex_size is 1, then g
* is a block group number; otherwise it is flex_bg number.
*/
void get_orlov_stats(struct super_block *sb, ext4_group_t g,
int flex_size, struct orlov_stats *stats)
static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
int flex_size, struct orlov_stats *stats)
{
struct ext4_group_desc *desc;
struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
@ -712,8 +716,17 @@ static int ext4_claim_inode(struct super_block *sb,
{
int free = 0, retval = 0, count;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
/*
* We have to be sure that new inode allocation does not race with
* inode table initialization, because otherwise we may end up
* allocating and writing new inode right before sb_issue_zeroout
* takes place and overwriting our new inode with zeroes. So we
* take alloc_sem to prevent it.
*/
down_read(&grp->alloc_sem);
ext4_lock_group(sb, group);
if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
/* not a free inode */
@ -724,6 +737,7 @@ static int ext4_claim_inode(struct super_block *sb,
if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
ino > EXT4_INODES_PER_GROUP(sb)) {
ext4_unlock_group(sb, group);
up_read(&grp->alloc_sem);
ext4_error(sb, "reserved inode or inode > inodes count - "
"block_group = %u, inode=%lu", group,
ino + group * EXT4_INODES_PER_GROUP(sb));
@ -772,6 +786,7 @@ static int ext4_claim_inode(struct super_block *sb,
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
err_ret:
ext4_unlock_group(sb, group);
up_read(&grp->alloc_sem);
return retval;
}
@ -1205,3 +1220,109 @@ unsigned long ext4_count_dirs(struct super_block * sb)
}
return count;
}
/*
* Zeroes not yet zeroed inode table - just write zeroes through the whole
* inode table. Must be called without any spinlock held. The only place
* where it is called from on active part of filesystem is ext4lazyinit
* thread, so we do not need any special locks, however we have to prevent
* inode allocation from the current group, so we take alloc_sem lock, to
* block ext4_claim_inode until we are finished.
*/
extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
int barrier)
{
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_desc *gdp = NULL;
struct buffer_head *group_desc_bh;
handle_t *handle;
ext4_fsblk_t blk;
int num, ret = 0, used_blks = 0;
/* This should not happen, but just to be sure check this */
if (sb->s_flags & MS_RDONLY) {
ret = 1;
goto out;
}
gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
if (!gdp)
goto out;
/*
* We do not need to lock this, because we are the only one
* handling this flag.
*/
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
goto out;
handle = ext4_journal_start_sb(sb, 1);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
}
down_write(&grp->alloc_sem);
/*
* If inode bitmap was already initialized there may be some
* used inodes so we need to skip blocks with used inodes in
* inode table.
*/
if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
ext4_itable_unused_count(sb, gdp)),
sbi->s_inodes_per_block);
if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
ext4_error(sb, "Something is wrong with group %u\n"
"Used itable blocks: %d"
"itable unused count: %u\n",
group, used_blks,
ext4_itable_unused_count(sb, gdp));
ret = 1;
goto out;
}
blk = ext4_inode_table(sb, gdp) + used_blks;
num = sbi->s_itb_per_group - used_blks;
BUFFER_TRACE(group_desc_bh, "get_write_access");
ret = ext4_journal_get_write_access(handle,
group_desc_bh);
if (ret)
goto err_out;
/*
* Skip zeroout if the inode table is full. But we set the ZEROED
* flag anyway, because obviously, when it is full it does not need
* further zeroing.
*/
if (unlikely(num == 0))
goto skip_zeroout;
ext4_debug("going to zero out inode table in group %d\n",
group);
ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
if (ret < 0)
goto err_out;
if (barrier)
blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
skip_zeroout:
ext4_lock_group(sb, group);
gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
ext4_unlock_group(sb, group);
BUFFER_TRACE(group_desc_bh,
"call ext4_handle_dirty_metadata");
ret = ext4_handle_dirty_metadata(handle, NULL,
group_desc_bh);
err_out:
up_write(&grp->alloc_sem);
ext4_journal_stop(handle);
out:
return ret;
}

View file

@ -60,6 +60,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
}
static void ext4_invalidatepage(struct page *page, unsigned long offset);
static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
static int __ext4_journalled_writepage(struct page *page, unsigned int len);
static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
/*
* Test whether an inode is a fast symlink.
@ -755,6 +761,11 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
* parent to disk.
*/
bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
if (unlikely(!bh)) {
err = -EIO;
goto failed;
}
branch[n].bh = bh;
lock_buffer(bh);
BUFFER_TRACE(bh, "call get_create_access");
@ -1207,8 +1218,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
break;
idx++;
num++;
if (num >= max_pages)
if (num >= max_pages) {
done = 1;
break;
}
}
pagevec_release(&pvec);
}
@ -1995,16 +2008,23 @@ static void ext4_da_page_release_reservation(struct page *page,
*
* As pages are already locked by write_cache_pages(), we can't use it
*/
static int mpage_da_submit_io(struct mpage_da_data *mpd)
static int mpage_da_submit_io(struct mpage_da_data *mpd,
struct ext4_map_blocks *map)
{
long pages_skipped;
struct pagevec pvec;
unsigned long index, end;
int ret = 0, err, nr_pages, i;
struct inode *inode = mpd->inode;
struct address_space *mapping = inode->i_mapping;
loff_t size = i_size_read(inode);
unsigned int len, block_start;
struct buffer_head *bh, *page_bufs = NULL;
int journal_data = ext4_should_journal_data(inode);
sector_t pblock = 0, cur_logical = 0;
struct ext4_io_submit io_submit;
BUG_ON(mpd->next_page <= mpd->first_page);
memset(&io_submit, 0, sizeof(io_submit));
/*
* We need to start from the first_page to the next_page - 1
* to make sure we also write the mapped dirty buffer_heads.
@ -2020,122 +2040,108 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
int commit_write = 0, redirty_page = 0;
struct page *page = pvec.pages[i];
index = page->index;
if (index > end)
break;
if (index == size >> PAGE_CACHE_SHIFT)
len = size & ~PAGE_CACHE_MASK;
else
len = PAGE_CACHE_SIZE;
if (map) {
cur_logical = index << (PAGE_CACHE_SHIFT -
inode->i_blkbits);
pblock = map->m_pblk + (cur_logical -
map->m_lblk);
}
index++;
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
pages_skipped = mpd->wbc->pages_skipped;
err = mapping->a_ops->writepage(page, mpd->wbc);
if (!err && (pages_skipped == mpd->wbc->pages_skipped))
/*
* have successfully written the page
* without skipping the same
*/
/*
* If the page does not have buffers (for
* whatever reason), try to create them using
* __block_write_begin. If this fails,
* redirty the page and move on.
*/
if (!page_has_buffers(page)) {
if (__block_write_begin(page, 0, len,
noalloc_get_block_write)) {
redirty_page:
redirty_page_for_writepage(mpd->wbc,
page);
unlock_page(page);
continue;
}
commit_write = 1;
}
bh = page_bufs = page_buffers(page);
block_start = 0;
do {
if (!bh)
goto redirty_page;
if (map && (cur_logical >= map->m_lblk) &&
(cur_logical <= (map->m_lblk +
(map->m_len - 1)))) {
if (buffer_delay(bh)) {
clear_buffer_delay(bh);
bh->b_blocknr = pblock;
}
if (buffer_unwritten(bh) ||
buffer_mapped(bh))
BUG_ON(bh->b_blocknr != pblock);
if (map->m_flags & EXT4_MAP_UNINIT)
set_buffer_uninit(bh);
clear_buffer_unwritten(bh);
}
/* redirty page if block allocation undone */
if (buffer_delay(bh) || buffer_unwritten(bh))
redirty_page = 1;
bh = bh->b_this_page;
block_start += bh->b_size;
cur_logical++;
pblock++;
} while (bh != page_bufs);
if (redirty_page)
goto redirty_page;
if (commit_write)
/* mark the buffer_heads as dirty & uptodate */
block_commit_write(page, 0, len);
/*
* Delalloc doesn't support data journalling,
* but eventually maybe we'll lift this
* restriction.
*/
if (unlikely(journal_data && PageChecked(page)))
err = __ext4_journalled_writepage(page, len);
else
err = ext4_bio_write_page(&io_submit, page,
len, mpd->wbc);
if (!err)
mpd->pages_written++;
/*
* In error case, we have to continue because
* remaining pages are still locked
* XXX: unlock and re-dirty them?
*/
if (ret == 0)
ret = err;
}
pagevec_release(&pvec);
}
ext4_io_submit(&io_submit);
return ret;
}
/*
* mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
*
* the function goes through all passed space and put actual disk
* block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
*/
static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
struct ext4_map_blocks *map)
{
struct inode *inode = mpd->inode;
struct address_space *mapping = inode->i_mapping;
int blocks = map->m_len;
sector_t pblock = map->m_pblk, cur_logical;
struct buffer_head *head, *bh;
pgoff_t index, end;
struct pagevec pvec;
int nr_pages, i;
index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
pagevec_init(&pvec, 0);
while (index <= end) {
/* XXX: optimize tail */
nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
index = page->index;
if (index > end)
break;
index++;
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
BUG_ON(!page_has_buffers(page));
bh = page_buffers(page);
head = bh;
/* skip blocks out of the range */
do {
if (cur_logical >= map->m_lblk)
break;
cur_logical++;
} while ((bh = bh->b_this_page) != head);
do {
if (cur_logical >= map->m_lblk + blocks)
break;
if (buffer_delay(bh) || buffer_unwritten(bh)) {
BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
if (buffer_delay(bh)) {
clear_buffer_delay(bh);
bh->b_blocknr = pblock;
} else {
/*
* unwritten already should have
* blocknr assigned. Verify that
*/
clear_buffer_unwritten(bh);
BUG_ON(bh->b_blocknr != pblock);
}
} else if (buffer_mapped(bh))
BUG_ON(bh->b_blocknr != pblock);
if (map->m_flags & EXT4_MAP_UNINIT)
set_buffer_uninit(bh);
cur_logical++;
pblock++;
} while ((bh = bh->b_this_page) != head);
}
pagevec_release(&pvec);
}
}
static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
sector_t logical, long blk_cnt)
{
@ -2187,35 +2193,32 @@ static void ext4_print_free_blocks(struct inode *inode)
}
/*
* mpage_da_map_blocks - go through given space
* mpage_da_map_and_submit - go through given space, map them
* if necessary, and then submit them for I/O
*
* @mpd - bh describing space
*
* The function skips space we know is already mapped to disk blocks.
*
*/
static int mpage_da_map_blocks(struct mpage_da_data *mpd)
static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
{
int err, blks, get_blocks_flags;
struct ext4_map_blocks map;
struct ext4_map_blocks map, *mapp = NULL;
sector_t next = mpd->b_blocknr;
unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
handle_t *handle = NULL;
/*
* We consider only non-mapped and non-allocated blocks
* If the blocks are mapped already, or we couldn't accumulate
* any blocks, then proceed immediately to the submission stage.
*/
if ((mpd->b_state & (1 << BH_Mapped)) &&
!(mpd->b_state & (1 << BH_Delay)) &&
!(mpd->b_state & (1 << BH_Unwritten)))
return 0;
/*
* If we didn't accumulate anything to write simply return
*/
if (!mpd->b_size)
return 0;
if ((mpd->b_size == 0) ||
((mpd->b_state & (1 << BH_Mapped)) &&
!(mpd->b_state & (1 << BH_Delay)) &&
!(mpd->b_state & (1 << BH_Unwritten))))
goto submit_io;
handle = ext4_journal_current_handle();
BUG_ON(!handle);
@ -2252,17 +2255,18 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
err = blks;
/*
* If get block returns with error we simply
* return. Later writepage will redirty the page and
* writepages will find the dirty page again
* If get block returns EAGAIN or ENOSPC and there
* appears to be free blocks we will call
* ext4_writepage() for all of the pages which will
* just redirty the pages.
*/
if (err == -EAGAIN)
return 0;
goto submit_io;
if (err == -ENOSPC &&
ext4_count_free_blocks(sb)) {
mpd->retval = err;
return 0;
goto submit_io;
}
/*
@ -2287,10 +2291,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
/* invalidate all the pages */
ext4_da_block_invalidatepages(mpd, next,
mpd->b_size >> mpd->inode->i_blkbits);
return err;
return;
}
BUG_ON(blks == 0);
mapp = &map;
if (map.m_flags & EXT4_MAP_NEW) {
struct block_device *bdev = mpd->inode->i_sb->s_bdev;
int i;
@ -2299,18 +2304,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
unmap_underlying_metadata(bdev, map.m_pblk + i);
}
/*
* If blocks are delayed marked, we need to
* put actual blocknr and drop delayed bit
*/
if ((mpd->b_state & (1 << BH_Delay)) ||
(mpd->b_state & (1 << BH_Unwritten)))
mpage_put_bnr_to_bhs(mpd, &map);
if (ext4_should_order_data(mpd->inode)) {
err = ext4_jbd2_file_inode(handle, mpd->inode);
if (err)
return err;
/* This only happens if the journal is aborted */
return;
}
/*
@ -2321,10 +2319,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
disksize = i_size_read(mpd->inode);
if (disksize > EXT4_I(mpd->inode)->i_disksize) {
ext4_update_i_disksize(mpd->inode, disksize);
return ext4_mark_inode_dirty(handle, mpd->inode);
err = ext4_mark_inode_dirty(handle, mpd->inode);
if (err)
ext4_error(mpd->inode->i_sb,
"Failed to mark inode %lu dirty",
mpd->inode->i_ino);
}
return 0;
submit_io:
mpage_da_submit_io(mpd, mapp);
mpd->io_done = 1;
}
#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@ -2401,9 +2405,7 @@ flush_it:
* We couldn't merge the block to our extent, so we
* need to flush current extent and start new one
*/
if (mpage_da_map_blocks(mpd) == 0)
mpage_da_submit_io(mpd);
mpd->io_done = 1;
mpage_da_map_and_submit(mpd);
return;
}
@ -2422,9 +2424,9 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
* The function finds extents of pages and scan them for all blocks.
*/
static int __mpage_da_writepage(struct page *page,
struct writeback_control *wbc, void *data)
struct writeback_control *wbc,
struct mpage_da_data *mpd)
{
struct mpage_da_data *mpd = data;
struct inode *inode = mpd->inode;
struct buffer_head *bh, *head;
sector_t logical;
@ -2435,15 +2437,13 @@ static int __mpage_da_writepage(struct page *page,
if (mpd->next_page != page->index) {
/*
* Nope, we can't. So, we map non-allocated blocks
* and start IO on them using writepage()
* and start IO on them
*/
if (mpd->next_page != mpd->first_page) {
if (mpage_da_map_blocks(mpd) == 0)
mpage_da_submit_io(mpd);
mpage_da_map_and_submit(mpd);
/*
* skip rest of the page in the page_vec
*/
mpd->io_done = 1;
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return MPAGE_DA_EXTENT_TAIL;
@ -2622,6 +2622,7 @@ static int __ext4_journalled_writepage(struct page *page,
int ret = 0;
int err;
ClearPageChecked(page);
page_bufs = page_buffers(page);
BUG_ON(!page_bufs);
walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
@ -2699,7 +2700,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
static int ext4_writepage(struct page *page,
struct writeback_control *wbc)
{
int ret = 0;
int ret = 0, commit_write = 0;
loff_t size;
unsigned int len;
struct buffer_head *page_bufs = NULL;
@ -2712,71 +2713,46 @@ static int ext4_writepage(struct page *page,
else
len = PAGE_CACHE_SIZE;
if (page_has_buffers(page)) {
page_bufs = page_buffers(page);
if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
ext4_bh_delay_or_unwritten)) {
/*
* We don't want to do block allocation
* So redirty the page and return
* We may reach here when we do a journal commit
* via journal_submit_inode_data_buffers.
* If we don't have mapping block we just ignore
* them. We can also reach here via shrink_page_list
*/
/*
* If the page does not have buffers (for whatever reason),
* try to create them using __block_write_begin. If this
* fails, redirty the page and move on.
*/
if (!page_buffers(page)) {
if (__block_write_begin(page, 0, len,
noalloc_get_block_write)) {
redirty_page:
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
}
} else {
commit_write = 1;
}
page_bufs = page_buffers(page);
if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
ext4_bh_delay_or_unwritten)) {
/*
* The test for page_has_buffers() is subtle:
* We know the page is dirty but it lost buffers. That means
* that at some moment in time after write_begin()/write_end()
* has been called all buffers have been clean and thus they
* must have been written at least once. So they are all
* mapped and we can happily proceed with mapping them
* and writing the page.
*
* Try to initialize the buffer_heads and check whether
* all are mapped and non delay. We don't want to
* do block allocation here.
* We don't want to do block allocation So redirty the
* page and return We may reach here when we do a
* journal commit via
* journal_submit_inode_data_buffers. If we don't
* have mapping block we just ignore them. We can also
* reach here via shrink_page_list
*/
ret = __block_write_begin(page, 0, len,
noalloc_get_block_write);
if (!ret) {
page_bufs = page_buffers(page);
/* check whether all are mapped and non delay */
if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
ext4_bh_delay_or_unwritten)) {
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
}
} else {
/*
* We can't do block allocation here
* so just redity the page and unlock
* and return
*/
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
}
goto redirty_page;
}
if (commit_write)
/* now mark the buffer_heads as dirty and uptodate */
block_commit_write(page, 0, len);
}
if (PageChecked(page) && ext4_should_journal_data(inode)) {
if (PageChecked(page) && ext4_should_journal_data(inode))
/*
* It's mmapped pagecache. Add buffers and journal it. There
* doesn't seem much point in redirtying the page here.
*/
ClearPageChecked(page);
return __ext4_journalled_writepage(page, len);
}
if (page_bufs && buffer_uninit(page_bufs)) {
if (buffer_uninit(page_bufs)) {
ext4_set_bh_endio(page_bufs, inode);
ret = block_write_full_page_endio(page, noalloc_get_block_write,
wbc, ext4_end_io_buffer_write);
@ -2823,25 +2799,32 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
*/
static int write_cache_pages_da(struct address_space *mapping,
struct writeback_control *wbc,
struct mpage_da_data *mpd)
struct mpage_da_data *mpd,
pgoff_t *done_index)
{
int ret = 0;
int done = 0;
struct pagevec pvec;
int nr_pages;
unsigned nr_pages;
pgoff_t index;
pgoff_t end; /* Inclusive */
long nr_to_write = wbc->nr_to_write;
int tag;
pagevec_init(&pvec, 0);
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
if (wbc->sync_mode == WB_SYNC_ALL)
tag = PAGECACHE_TAG_TOWRITE;
else
tag = PAGECACHE_TAG_DIRTY;
*done_index = index;
while (!done && (index <= end)) {
int i;
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
PAGECACHE_TAG_DIRTY,
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
break;
@ -2861,6 +2844,8 @@ static int write_cache_pages_da(struct address_space *mapping,
break;
}
*done_index = page->index + 1;
lock_page(page);
/*
@ -2946,6 +2931,8 @@ static int ext4_da_writepages(struct address_space *mapping,
long desired_nr_to_write, nr_to_writebump = 0;
loff_t range_start = wbc->range_start;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
pgoff_t done_index = 0;
pgoff_t end;
trace_ext4_da_writepages(inode, wbc);
@ -2981,8 +2968,11 @@ static int ext4_da_writepages(struct address_space *mapping,
wbc->range_start = index << PAGE_CACHE_SHIFT;
wbc->range_end = LLONG_MAX;
wbc->range_cyclic = 0;
} else
end = -1;
} else {
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
}
/*
* This works around two forms of stupidity. The first is in
@ -3001,9 +2991,12 @@ static int ext4_da_writepages(struct address_space *mapping,
* sbi->max_writeback_mb_bump whichever is smaller.
*/
max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
if (!range_cyclic && range_whole)
desired_nr_to_write = wbc->nr_to_write * 8;
else
if (!range_cyclic && range_whole) {
if (wbc->nr_to_write == LONG_MAX)
desired_nr_to_write = wbc->nr_to_write;
else
desired_nr_to_write = wbc->nr_to_write * 8;
} else
desired_nr_to_write = ext4_num_dirty_pages(inode, index,
max_pages);
if (desired_nr_to_write > max_pages)
@ -3020,6 +3013,9 @@ static int ext4_da_writepages(struct address_space *mapping,
pages_skipped = wbc->pages_skipped;
retry:
if (wbc->sync_mode == WB_SYNC_ALL)
tag_pages_for_writeback(mapping, index, end);
while (!ret && wbc->nr_to_write > 0) {
/*
@ -3058,16 +3054,14 @@ retry:
mpd.io_done = 0;
mpd.pages_written = 0;
mpd.retval = 0;
ret = write_cache_pages_da(mapping, wbc, &mpd);
ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
/*
* If we have a contiguous extent of pages and we
* haven't done the I/O yet, map the blocks and submit
* them for I/O.
*/
if (!mpd.io_done && mpd.next_page != mpd.first_page) {
if (mpage_da_map_blocks(&mpd) == 0)
mpage_da_submit_io(&mpd);
mpd.io_done = 1;
mpage_da_map_and_submit(&mpd);
ret = MPAGE_DA_EXTENT_TAIL;
}
trace_ext4_da_write_pages(inode, &mpd);
@ -3114,14 +3108,13 @@ retry:
__func__, wbc->nr_to_write, ret);
/* Update index */
index += pages_written;
wbc->range_cyclic = range_cyclic;
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
/*
* set the writeback_index so that range_cyclic
* mode will write it back later
*/
mapping->writeback_index = index;
mapping->writeback_index = done_index;
out_writepages:
wbc->nr_to_write -= nr_to_writebump;
@ -3456,15 +3449,6 @@ ext4_readpages(struct file *file, struct address_space *mapping,
return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
}
static void ext4_free_io_end(ext4_io_end_t *io)
{
BUG_ON(!io);
if (io->page)
put_page(io->page);
iput(io->inode);
kfree(io);
}
static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
{
struct buffer_head *head, *bh;
@ -3641,173 +3625,6 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock,
EXT4_GET_BLOCKS_IO_CREATE_EXT);
}
static void dump_completed_IO(struct inode * inode)
{
#ifdef EXT4_DEBUG
struct list_head *cur, *before, *after;
ext4_io_end_t *io, *io0, *io1;
unsigned long flags;
if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
return;
}
ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
cur = &io->list;
before = cur->prev;
io0 = container_of(before, ext4_io_end_t, list);
after = cur->next;
io1 = container_of(after, ext4_io_end_t, list);
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
io, inode->i_ino, io0, io1);
}
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
#endif
}
/*
* check a range of space and convert unwritten extents to written.
*/
static int ext4_end_io_nolock(ext4_io_end_t *io)
{
struct inode *inode = io->inode;
loff_t offset = io->offset;
ssize_t size = io->size;
int ret = 0;
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
"list->prev 0x%p\n",
io, inode->i_ino, io->list.next, io->list.prev);
if (list_empty(&io->list))
return ret;
if (io->flag != EXT4_IO_UNWRITTEN)
return ret;
ret = ext4_convert_unwritten_extents(inode, offset, size);
if (ret < 0) {
printk(KERN_EMERG "%s: failed to convert unwritten"
"extents to written extents, error is %d"
" io is still on inode %lu aio dio list\n",
__func__, ret, inode->i_ino);
return ret;
}
if (io->iocb)
aio_complete(io->iocb, io->result, 0);
/* clear the DIO AIO unwritten flag */
io->flag = 0;
return ret;
}
/*
* work on completed aio dio IO, to convert unwritten extents to extents
*/
static void ext4_end_io_work(struct work_struct *work)
{
ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
struct inode *inode = io->inode;
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned long flags;
int ret;
mutex_lock(&inode->i_mutex);
ret = ext4_end_io_nolock(io);
if (ret < 0) {
mutex_unlock(&inode->i_mutex);
return;
}
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
if (!list_empty(&io->list))
list_del_init(&io->list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
mutex_unlock(&inode->i_mutex);
ext4_free_io_end(io);
}
/*
* This function is called from ext4_sync_file().
*
* When IO is completed, the work to convert unwritten extents to
* written is queued on workqueue but may not get immediately
* scheduled. When fsync is called, we need to ensure the
* conversion is complete before fsync returns.
* The inode keeps track of a list of pending/completed IO that
* might needs to do the conversion. This function walks through
* the list and convert the related unwritten extents for completed IO
* to written.
* The function return the number of pending IOs on success.
*/
int flush_completed_IO(struct inode *inode)
{
ext4_io_end_t *io;
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned long flags;
int ret = 0;
int ret2 = 0;
if (list_empty(&ei->i_completed_io_list))
return ret;
dump_completed_IO(inode);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
while (!list_empty(&ei->i_completed_io_list)){
io = list_entry(ei->i_completed_io_list.next,
ext4_io_end_t, list);
/*
* Calling ext4_end_io_nolock() to convert completed
* IO to written.
*
* When ext4_sync_file() is called, run_queue() may already
* about to flush the work corresponding to this io structure.
* It will be upset if it founds the io structure related
* to the work-to-be schedule is freed.
*
* Thus we need to keep the io structure still valid here after
* convertion finished. The io structure has a flag to
* avoid double converting from both fsync and background work
* queue work.
*/
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
ret = ext4_end_io_nolock(io);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
if (ret < 0)
ret2 = ret;
else
list_del_init(&io->list);
}
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
return (ret2 < 0) ? ret2 : 0;
}
static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
{
ext4_io_end_t *io = NULL;
io = kmalloc(sizeof(*io), flags);
if (io) {
igrab(inode);
io->inode = inode;
io->flag = 0;
io->offset = 0;
io->size = 0;
io->page = NULL;
io->iocb = NULL;
io->result = 0;
INIT_WORK(&io->work, ext4_end_io_work);
INIT_LIST_HEAD(&io->list);
}
return io;
}
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private, int ret,
bool is_async)
@ -3827,7 +3644,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
size);
/* if not aio dio with unwritten extents, just free io and return */
if (io_end->flag != EXT4_IO_UNWRITTEN){
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
ext4_free_io_end(io_end);
iocb->private = NULL;
out:
@ -3844,14 +3661,14 @@ out:
}
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
/* queue the work to convert unwritten extents to written */
queue_work(wq, &io_end->work);
/* Add the io_end to per-inode completed aio dio list*/
ei = EXT4_I(io_end->inode);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
list_add_tail(&io_end->list, &ei->i_completed_io_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
/* queue the work to convert unwritten extents to written */
queue_work(wq, &io_end->work);
iocb->private = NULL;
}
@ -3872,7 +3689,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
goto out;
}
io_end->flag = EXT4_IO_UNWRITTEN;
io_end->flag = EXT4_IO_END_UNWRITTEN;
inode = io_end->inode;
/* Add the io_end to per-inode completed io list*/
@ -5463,6 +5280,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
int error, rc = 0;
int orphan = 0;
const unsigned int ia_valid = attr->ia_valid;
error = inode_change_ok(inode, attr);
@ -5518,8 +5336,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
error = PTR_ERR(handle);
goto err_out;
}
error = ext4_orphan_add(handle, inode);
if (ext4_handle_valid(handle)) {
error = ext4_orphan_add(handle, inode);
orphan = 1;
}
EXT4_I(inode)->i_disksize = attr->ia_size;
rc = ext4_mark_inode_dirty(handle, inode);
if (!error)
@ -5537,6 +5357,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
goto err_out;
}
ext4_orphan_del(handle, inode);
orphan = 0;
ext4_journal_stop(handle);
goto err_out;
}
@ -5559,7 +5380,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
* If the call to ext4_truncate failed to get a transaction handle at
* all, we need to clean up the in-core orphan list manually.
*/
if (inode->i_nlink)
if (orphan && inode->i_nlink)
ext4_orphan_del(NULL, inode);
if (!rc && (ia_valid & ATTR_MODE))
@ -5642,7 +5463,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
*
* Also account for superblock, inode, quota and xattr blocks
*/
int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
{
ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
int gdpblocks;

View file

@ -338,6 +338,14 @@
static struct kmem_cache *ext4_pspace_cachep;
static struct kmem_cache *ext4_ac_cachep;
static struct kmem_cache *ext4_free_ext_cachep;
/* We create slab caches for groupinfo data structures based on the
* superblock block size. There will be one per mounted filesystem for
* each unique s_blocksize_bits */
#define NR_GRPINFO_CACHES \
(EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@ -938,6 +946,85 @@ out:
return err;
}
/*
* lock the group_info alloc_sem of all the groups
* belonging to the same buddy cache page. This
* make sure other parallel operation on the buddy
* cache doesn't happen whild holding the buddy cache
* lock
*/
static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
ext4_group_t group)
{
int i;
int block, pnum;
int blocks_per_page;
int groups_per_page;
ext4_group_t ngroups = ext4_get_groups_count(sb);
ext4_group_t first_group;
struct ext4_group_info *grp;
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
/*
* the buddy cache inode stores the block bitmap
* and buddy information in consecutive blocks.
* So for each group we need two blocks.
*/
block = group * 2;
pnum = block / blocks_per_page;
first_group = pnum * blocks_per_page / 2;
groups_per_page = blocks_per_page >> 1;
if (groups_per_page == 0)
groups_per_page = 1;
/* read all groups the page covers into the cache */
for (i = 0; i < groups_per_page; i++) {
if ((first_group + i) >= ngroups)
break;
grp = ext4_get_group_info(sb, first_group + i);
/* take all groups write allocation
* semaphore. This make sure there is
* no block allocation going on in any
* of that groups
*/
down_write_nested(&grp->alloc_sem, i);
}
return i;
}
static void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
ext4_group_t group, int locked_group)
{
int i;
int block, pnum;
int blocks_per_page;
ext4_group_t first_group;
struct ext4_group_info *grp;
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
/*
* the buddy cache inode stores the block bitmap
* and buddy information in consecutive blocks.
* So for each group we need two blocks.
*/
block = group * 2;
pnum = block / blocks_per_page;
first_group = pnum * blocks_per_page / 2;
/* release locks on all the groups */
for (i = 0; i < locked_group; i++) {
grp = ext4_get_group_info(sb, first_group + i);
/* take all groups write allocation
* semaphore. This make sure there is
* no block allocation going on in any
* of that groups
*/
up_write(&grp->alloc_sem);
}
}
/*
* Locking note: This routine calls ext4_mb_init_cache(), which takes the
* block group lock of all groups for this page; do not hold the BG lock when
@ -1915,84 +2002,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
return 0;
}
/*
* lock the group_info alloc_sem of all the groups
* belonging to the same buddy cache page. This
* make sure other parallel operation on the buddy
* cache doesn't happen whild holding the buddy cache
* lock
*/
int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
{
int i;
int block, pnum;
int blocks_per_page;
int groups_per_page;
ext4_group_t ngroups = ext4_get_groups_count(sb);
ext4_group_t first_group;
struct ext4_group_info *grp;
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
/*
* the buddy cache inode stores the block bitmap
* and buddy information in consecutive blocks.
* So for each group we need two blocks.
*/
block = group * 2;
pnum = block / blocks_per_page;
first_group = pnum * blocks_per_page / 2;
groups_per_page = blocks_per_page >> 1;
if (groups_per_page == 0)
groups_per_page = 1;
/* read all groups the page covers into the cache */
for (i = 0; i < groups_per_page; i++) {
if ((first_group + i) >= ngroups)
break;
grp = ext4_get_group_info(sb, first_group + i);
/* take all groups write allocation
* semaphore. This make sure there is
* no block allocation going on in any
* of that groups
*/
down_write_nested(&grp->alloc_sem, i);
}
return i;
}
void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
ext4_group_t group, int locked_group)
{
int i;
int block, pnum;
int blocks_per_page;
ext4_group_t first_group;
struct ext4_group_info *grp;
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
/*
* the buddy cache inode stores the block bitmap
* and buddy information in consecutive blocks.
* So for each group we need two blocks.
*/
block = group * 2;
pnum = block / blocks_per_page;
first_group = pnum * blocks_per_page / 2;
/* release locks on all the groups */
for (i = 0; i < locked_group; i++) {
grp = ext4_get_group_info(sb, first_group + i);
/* take all groups write allocation
* semaphore. This make sure there is
* no block allocation going on in any
* of that groups
*/
up_write(&grp->alloc_sem);
}
}
static noinline_for_stack int
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{
@ -2233,15 +2242,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = {
.release = seq_release,
};
static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
{
int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
BUG_ON(!cachep);
return cachep;
}
/* Create and initialize ext4_group_info data for the given group. */
int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *desc)
{
int i, len;
int i;
int metalen = 0;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_info **meta_group_info;
struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
/*
* First check if this group is the first of a reserved block.
@ -2261,22 +2279,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
meta_group_info;
}
/*
* calculate needed size. if change bb_counters size,
* don't forget about ext4_mb_generate_buddy()
*/
len = offsetof(typeof(**meta_group_info),
bb_counters[sb->s_blocksize_bits + 2]);
meta_group_info =
sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
meta_group_info[i] = kzalloc(len, GFP_KERNEL);
meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
if (meta_group_info[i] == NULL) {
printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
goto exit_group_info;
}
memset(meta_group_info[i], 0, kmem_cache_size(cachep));
set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
&(meta_group_info[i]->bb_state));
@ -2331,6 +2343,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
int num_meta_group_infos_max;
int array_size;
struct ext4_group_desc *desc;
struct kmem_cache *cachep;
/* This is the number of blocks used by GDT */
num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
@ -2389,8 +2402,9 @@ static int ext4_mb_init_backend(struct super_block *sb)
return 0;
err_freebuddy:
cachep = get_groupinfo_cache(sb->s_blocksize_bits);
while (i-- > 0)
kfree(ext4_get_group_info(sb, i));
kmem_cache_free(cachep, ext4_get_group_info(sb, i));
i = num_meta_group_infos;
while (i-- > 0)
kfree(sbi->s_group_info[i]);
@ -2407,19 +2421,48 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
unsigned offset;
unsigned max;
int ret;
int cache_index;
struct kmem_cache *cachep;
char *namep = NULL;
i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
if (sbi->s_mb_offsets == NULL) {
return -ENOMEM;
ret = -ENOMEM;
goto out;
}
i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
if (sbi->s_mb_maxs == NULL) {
kfree(sbi->s_mb_offsets);
return -ENOMEM;
ret = -ENOMEM;
goto out;
}
cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
cachep = ext4_groupinfo_caches[cache_index];
if (!cachep) {
char name[32];
int len = offsetof(struct ext4_group_info,
bb_counters[sb->s_blocksize_bits + 2]);
sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
namep = kstrdup(name, GFP_KERNEL);
if (!namep) {
ret = -ENOMEM;
goto out;
}
/* Need to free the kmem_cache_name() when we
* destroy the slab */
cachep = kmem_cache_create(namep, len, 0,
SLAB_RECLAIM_ACCOUNT, NULL);
if (!cachep) {
ret = -ENOMEM;
goto out;
}
ext4_groupinfo_caches[cache_index] = cachep;
}
/* order 0 is regular bitmap */
@ -2440,9 +2483,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
/* init file for buddy data */
ret = ext4_mb_init_backend(sb);
if (ret != 0) {
kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs);
return ret;
goto out;
}
spin_lock_init(&sbi->s_md_lock);
@ -2457,9 +2498,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
if (sbi->s_locality_groups == NULL) {
kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs);
return -ENOMEM;
ret = -ENOMEM;
goto out;
}
for_each_possible_cpu(i) {
struct ext4_locality_group *lg;
@ -2476,7 +2516,13 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
if (sbi->s_journal)
sbi->s_journal->j_commit_callback = release_blocks_on_commit;
return 0;
out:
if (ret) {
kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs);
kfree(namep);
}
return ret;
}
/* need to called with the ext4 group lock held */
@ -2504,6 +2550,7 @@ int ext4_mb_release(struct super_block *sb)
int num_meta_group_infos;
struct ext4_group_info *grinfo;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
if (sbi->s_group_info) {
for (i = 0; i < ngroups; i++) {
@ -2514,7 +2561,7 @@ int ext4_mb_release(struct super_block *sb)
ext4_lock_group(sb, i);
ext4_mb_cleanup_pa(grinfo);
ext4_unlock_group(sb, i);
kfree(grinfo);
kmem_cache_free(cachep, grinfo);
}
num_meta_group_infos = (ngroups +
EXT4_DESC_PER_BLOCK(sb) - 1) >>
@ -2558,7 +2605,7 @@ int ext4_mb_release(struct super_block *sb)
return 0;
}
static inline void ext4_issue_discard(struct super_block *sb,
static inline int ext4_issue_discard(struct super_block *sb,
ext4_group_t block_group, ext4_grpblk_t block, int count)
{
int ret;
@ -2568,10 +2615,11 @@ static inline void ext4_issue_discard(struct super_block *sb,
trace_ext4_discard_blocks(sb,
(unsigned long long) discard_block, count);
ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
if (ret == EOPNOTSUPP) {
if (ret == -EOPNOTSUPP) {
ext4_warning(sb, "discard not supported, disabling");
clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
}
return ret;
}
/*
@ -2659,28 +2707,22 @@ static void ext4_remove_debugfs_entry(void)
#endif
int __init init_ext4_mballoc(void)
int __init ext4_init_mballoc(void)
{
ext4_pspace_cachep =
kmem_cache_create("ext4_prealloc_space",
sizeof(struct ext4_prealloc_space),
0, SLAB_RECLAIM_ACCOUNT, NULL);
ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
SLAB_RECLAIM_ACCOUNT);
if (ext4_pspace_cachep == NULL)
return -ENOMEM;
ext4_ac_cachep =
kmem_cache_create("ext4_alloc_context",
sizeof(struct ext4_allocation_context),
0, SLAB_RECLAIM_ACCOUNT, NULL);
ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
SLAB_RECLAIM_ACCOUNT);
if (ext4_ac_cachep == NULL) {
kmem_cache_destroy(ext4_pspace_cachep);
return -ENOMEM;
}
ext4_free_ext_cachep =
kmem_cache_create("ext4_free_block_extents",
sizeof(struct ext4_free_data),
0, SLAB_RECLAIM_ACCOUNT, NULL);
ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
SLAB_RECLAIM_ACCOUNT);
if (ext4_free_ext_cachep == NULL) {
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
@ -2690,8 +2732,9 @@ int __init init_ext4_mballoc(void)
return 0;
}
void exit_ext4_mballoc(void)
void ext4_exit_mballoc(void)
{
int i;
/*
* Wait for completion of call_rcu()'s on ext4_pspace_cachep
* before destroying the slab cache.
@ -2700,6 +2743,15 @@ void exit_ext4_mballoc(void)
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
kmem_cache_destroy(ext4_free_ext_cachep);
for (i = 0; i < NR_GRPINFO_CACHES; i++) {
struct kmem_cache *cachep = ext4_groupinfo_caches[i];
if (cachep) {
char *name = (char *)kmem_cache_name(cachep);
kmem_cache_destroy(cachep);
kfree(name);
}
}
ext4_remove_debugfs_entry();
}
@ -3536,8 +3588,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
*/
static noinline_for_stack int
ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
struct ext4_prealloc_space *pa,
struct ext4_allocation_context *ac)
struct ext4_prealloc_space *pa)
{
struct super_block *sb = e4b->bd_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@ -3555,11 +3606,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
end = bit + pa->pa_len;
if (ac) {
ac->ac_sb = sb;
ac->ac_inode = pa->pa_inode;
}
while (bit < end) {
bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
if (bit >= end)
@ -3570,16 +3616,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
(unsigned) next - bit, (unsigned) group);
free += next - bit;
if (ac) {
ac->ac_b_ex.fe_group = group;
ac->ac_b_ex.fe_start = bit;
ac->ac_b_ex.fe_len = next - bit;
ac->ac_b_ex.fe_logical = 0;
trace_ext4_mballoc_discard(ac);
}
trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
next - bit);
trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
grp_blk_start + bit, next - bit);
mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
bit = next + 1;
}
@ -3602,29 +3641,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
static noinline_for_stack int
ext4_mb_release_group_pa(struct ext4_buddy *e4b,
struct ext4_prealloc_space *pa,
struct ext4_allocation_context *ac)
struct ext4_prealloc_space *pa)
{
struct super_block *sb = e4b->bd_sb;
ext4_group_t group;
ext4_grpblk_t bit;
trace_ext4_mb_release_group_pa(sb, ac, pa);
trace_ext4_mb_release_group_pa(sb, pa);
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
if (ac) {
ac->ac_sb = sb;
ac->ac_inode = NULL;
ac->ac_b_ex.fe_group = group;
ac->ac_b_ex.fe_start = bit;
ac->ac_b_ex.fe_len = pa->pa_len;
ac->ac_b_ex.fe_logical = 0;
trace_ext4_mballoc_discard(ac);
}
trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
return 0;
}
@ -3645,7 +3674,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct buffer_head *bitmap_bh = NULL;
struct ext4_prealloc_space *pa, *tmp;
struct ext4_allocation_context *ac;
struct list_head list;
struct ext4_buddy e4b;
int err;
@ -3674,9 +3702,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
INIT_LIST_HEAD(&list);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (ac)
ac->ac_sb = sb;
repeat:
ext4_lock_group(sb, group);
list_for_each_entry_safe(pa, tmp,
@ -3731,9 +3756,9 @@ repeat:
spin_unlock(pa->pa_obj_lock);
if (pa->pa_type == MB_GROUP_PA)
ext4_mb_release_group_pa(&e4b, pa, ac);
ext4_mb_release_group_pa(&e4b, pa);
else
ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@ -3741,8 +3766,6 @@ repeat:
out:
ext4_unlock_group(sb, group);
if (ac)
kmem_cache_free(ext4_ac_cachep, ac);
ext4_mb_unload_buddy(&e4b);
put_bh(bitmap_bh);
return free;
@ -3763,7 +3786,6 @@ void ext4_discard_preallocations(struct inode *inode)
struct super_block *sb = inode->i_sb;
struct buffer_head *bitmap_bh = NULL;
struct ext4_prealloc_space *pa, *tmp;
struct ext4_allocation_context *ac;
ext4_group_t group = 0;
struct list_head list;
struct ext4_buddy e4b;
@ -3779,11 +3801,6 @@ void ext4_discard_preallocations(struct inode *inode)
INIT_LIST_HEAD(&list);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (ac) {
ac->ac_sb = sb;
ac->ac_inode = inode;
}
repeat:
/* first, collect all pa's in the inode */
spin_lock(&ei->i_prealloc_lock);
@ -3853,7 +3870,7 @@ repeat:
ext4_lock_group(sb, group);
list_del(&pa->pa_group_list);
ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
@ -3862,8 +3879,6 @@ repeat:
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
}
if (ac)
kmem_cache_free(ext4_ac_cachep, ac);
}
/*
@ -4061,14 +4076,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
struct ext4_buddy e4b;
struct list_head discard_list;
struct ext4_prealloc_space *pa, *tmp;
struct ext4_allocation_context *ac;
mb_debug(1, "discard locality group preallocation\n");
INIT_LIST_HEAD(&discard_list);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (ac)
ac->ac_sb = sb;
spin_lock(&lg->lg_prealloc_lock);
list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@ -4120,15 +4131,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
}
ext4_lock_group(sb, group);
list_del(&pa->pa_group_list);
ext4_mb_release_group_pa(&e4b, pa, ac);
ext4_mb_release_group_pa(&e4b, pa);
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
}
if (ac)
kmem_cache_free(ext4_ac_cachep, ac);
}
/*
@ -4492,7 +4501,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
{
struct buffer_head *bitmap_bh = NULL;
struct super_block *sb = inode->i_sb;
struct ext4_allocation_context *ac = NULL;
struct ext4_group_desc *gdp;
unsigned long freed = 0;
unsigned int overflow;
@ -4532,6 +4540,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
if (!bh)
tbh = sb_find_get_block(inode->i_sb,
block + i);
if (unlikely(!tbh))
continue;
ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
inode, tbh, block + i);
}
@ -4547,12 +4557,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
if (!ext4_should_writeback_data(inode))
flags |= EXT4_FREE_BLOCKS_METADATA;
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (ac) {
ac->ac_inode = inode;
ac->ac_sb = sb;
}
do_more:
overflow = 0;
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@ -4610,12 +4614,7 @@ do_more:
BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
}
#endif
if (ac) {
ac->ac_b_ex.fe_group = block_group;
ac->ac_b_ex.fe_start = bit;
ac->ac_b_ex.fe_len = count;
trace_ext4_mballoc_free(ac);
}
trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
err = ext4_mb_load_buddy(sb, block_group, &e4b);
if (err)
@ -4641,12 +4640,12 @@ do_more:
* with group lock held. generate_buddy look at
* them with group lock_held
*/
if (test_opt(sb, DISCARD))
ext4_issue_discard(sb, block_group, bit, count);
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count);
mb_free_blocks(inode, &e4b, bit, count);
ext4_mb_return_to_preallocation(inode, &e4b, block, count);
if (test_opt(sb, DISCARD))
ext4_issue_discard(sb, block_group, bit, count);
}
ret = ext4_free_blks_count(sb, gdp) + count;
@ -4686,7 +4685,190 @@ error_return:
dquot_free_block(inode, freed);
brelse(bitmap_bh);
ext4_std_error(sb, err);
if (ac)
kmem_cache_free(ext4_ac_cachep, ac);
return;
}
/**
* ext4_trim_extent -- function to TRIM one single free extent in the group
* @sb: super block for the file system
* @start: starting block of the free extent in the alloc. group
* @count: number of blocks to TRIM
* @group: alloc. group we are working with
* @e4b: ext4 buddy for the group
*
* Trim "count" blocks starting at "start" in the "group". To assure that no
* one will allocate those blocks, mark it as used in buddy bitmap. This must
* be called with under the group lock.
*/
static int ext4_trim_extent(struct super_block *sb, int start, int count,
ext4_group_t group, struct ext4_buddy *e4b)
{
struct ext4_free_extent ex;
int ret = 0;
assert_spin_locked(ext4_group_lock_ptr(sb, group));
ex.fe_start = start;
ex.fe_group = group;
ex.fe_len = count;
/*
* Mark blocks used, so no one can reuse them while
* being trimmed.
*/
mb_mark_used(e4b, &ex);
ext4_unlock_group(sb, group);
ret = ext4_issue_discard(sb, group, start, count);
if (ret)
ext4_std_error(sb, ret);
ext4_lock_group(sb, group);
mb_free_blocks(NULL, e4b, start, ex.fe_len);
return ret;
}
/**
* ext4_trim_all_free -- function to trim all free space in alloc. group
* @sb: super block for file system
* @e4b: ext4 buddy
* @start: first group block to examine
* @max: last group block to examine
* @minblocks: minimum extent block count
*
* ext4_trim_all_free walks through group's buddy bitmap searching for free
* extents. When the free block is found, ext4_trim_extent is called to TRIM
* the extent.
*
*
* ext4_trim_all_free walks through group's block bitmap searching for free
* extents. When the free extent is found, mark it as used in group buddy
* bitmap. Then issue a TRIM command on this extent and free the extent in
* the group buddy bitmap. This is done until whole group is scanned.
*/
ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
{
void *bitmap;
ext4_grpblk_t next, count = 0;
ext4_group_t group;
int ret = 0;
BUG_ON(e4b == NULL);
bitmap = e4b->bd_bitmap;
group = e4b->bd_group;
start = (e4b->bd_info->bb_first_free > start) ?
e4b->bd_info->bb_first_free : start;
ext4_lock_group(sb, group);
while (start < max) {
start = mb_find_next_zero_bit(bitmap, max, start);
if (start >= max)
break;
next = mb_find_next_bit(bitmap, max, start);
if ((next - start) >= minblocks) {
ret = ext4_trim_extent(sb, start,
next - start, group, e4b);
if (ret < 0)
break;
count += next - start;
}
start = next + 1;
if (fatal_signal_pending(current)) {
count = -ERESTARTSYS;
break;
}
if (need_resched()) {
ext4_unlock_group(sb, group);
cond_resched();
ext4_lock_group(sb, group);
}
if ((e4b->bd_info->bb_free - count) < minblocks)
break;
}
ext4_unlock_group(sb, group);
ext4_debug("trimmed %d blocks in the group %d\n",
count, group);
if (ret < 0)
count = ret;
return count;
}
/**
* ext4_trim_fs() -- trim ioctl handle function
* @sb: superblock for filesystem
* @range: fstrim_range structure
*
* start: First Byte to trim
* len: number of Bytes to trim from start
* minlen: minimum extent length in Bytes
* ext4_trim_fs goes through all allocation groups containing Bytes from
* start to start+len. For each such a group ext4_trim_all_free function
* is invoked to trim all free space.
*/
int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
{
struct ext4_buddy e4b;
ext4_group_t first_group, last_group;
ext4_group_t group, ngroups = ext4_get_groups_count(sb);
ext4_grpblk_t cnt = 0, first_block, last_block;
uint64_t start, len, minlen, trimmed;
int ret = 0;
start = range->start >> sb->s_blocksize_bits;
len = range->len >> sb->s_blocksize_bits;
minlen = range->minlen >> sb->s_blocksize_bits;
trimmed = 0;
if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
return -EINVAL;
/* Determine first and last group to examine based on start and len */
ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
&first_group, &first_block);
ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
&last_group, &last_block);
last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
last_block = EXT4_BLOCKS_PER_GROUP(sb);
if (first_group > last_group)
return -EINVAL;
for (group = first_group; group <= last_group; group++) {
ret = ext4_mb_load_buddy(sb, group, &e4b);
if (ret) {
ext4_error(sb, "Error in loading buddy "
"information for %u", group);
break;
}
if (len >= EXT4_BLOCKS_PER_GROUP(sb))
len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
else
last_block = len;
if (e4b.bd_info->bb_free >= minlen) {
cnt = ext4_trim_all_free(sb, &e4b, first_block,
last_block, minlen);
if (cnt < 0) {
ret = cnt;
ext4_mb_unload_buddy(&e4b);
break;
}
}
ext4_mb_unload_buddy(&e4b);
trimmed += cnt;
first_block = 0;
}
range->len = trimmed * sb->s_blocksize;
return ret;
}

View file

@ -412,7 +412,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
struct buffer_head *bh;
struct ext4_extent_header *eh;
block = idx_pblock(ix);
block = ext4_idx_pblock(ix);
bh = sb_bread(inode->i_sb, block);
if (!bh)
return -EIO;

View file

@ -85,7 +85,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
/* leaf block */
*extent = ++path[ppos].p_ext;
path[ppos].p_block = ext_pblock(path[ppos].p_ext);
path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
return 0;
}
@ -96,7 +96,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
/* index block */
path[ppos].p_idx++;
path[ppos].p_block = idx_pblock(path[ppos].p_idx);
path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
if (path[ppos+1].p_bh)
brelse(path[ppos+1].p_bh);
path[ppos+1].p_bh =
@ -111,7 +111,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
path[cur_ppos].p_idx =
EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
path[cur_ppos].p_block =
idx_pblock(path[cur_ppos].p_idx);
ext4_idx_pblock(path[cur_ppos].p_idx);
if (path[cur_ppos+1].p_bh)
brelse(path[cur_ppos+1].p_bh);
path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
@ -133,7 +133,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
path[leaf_ppos].p_ext = *extent =
EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
path[leaf_ppos].p_block =
ext_pblock(path[leaf_ppos].p_ext);
ext4_ext_pblock(path[leaf_ppos].p_ext);
return 0;
}
}
@ -249,7 +249,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
*/
o_end->ee_block = end_ext->ee_block;
o_end->ee_len = end_ext->ee_len;
ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
}
o_start->ee_len = start_ext->ee_len;
@ -276,7 +276,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
*/
o_end->ee_block = end_ext->ee_block;
o_end->ee_len = end_ext->ee_len;
ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
/*
* Set 0 to the extent block if new_ext was
@ -361,7 +361,7 @@ mext_insert_inside_block(struct ext4_extent *o_start,
/* Insert new entry */
if (new_ext->ee_len) {
o_start[i] = *new_ext;
ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
}
/* Insert end entry */
@ -488,7 +488,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
start_ext.ee_len = end_ext.ee_len = 0;
new_ext.ee_block = cpu_to_le32(*from);
ext4_ext_store_pblock(&new_ext, ext_pblock(dext));
ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
new_ext.ee_len = dext->ee_len;
new_ext_alen = ext4_ext_get_actual_len(&new_ext);
new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
@ -553,7 +553,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
copy_extent_status(oext, &end_ext);
end_ext_alen = ext4_ext_get_actual_len(&end_ext);
ext4_ext_store_pblock(&end_ext,
(ext_pblock(o_end) + oext_alen - end_ext_alen));
(ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
end_ext.ee_block =
cpu_to_le32(le32_to_cpu(o_end->ee_block) +
oext_alen - end_ext_alen);
@ -604,7 +604,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
/* When tmp_dext is too large, pick up the target range. */
diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff);
ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
tmp_dext->ee_block =
cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
@ -613,7 +613,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
tmp_dext->ee_len = cpu_to_le16(max_count);
orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff);
ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
/* Adjust extent length if donor extent is larger than orig */
if (ext4_ext_get_actual_len(tmp_dext) >

View file

@ -856,6 +856,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
struct buffer_head *bh_use[NAMEI_RA_SIZE];
struct buffer_head *bh, *ret = NULL;
ext4_lblk_t start, block, b;
const u8 *name = d_name->name;
int ra_max = 0; /* Number of bh's in the readahead
buffer, bh_use[] */
int ra_ptr = 0; /* Current index into readahead
@ -870,6 +871,16 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
namelen = d_name->len;
if (namelen > EXT4_NAME_LEN)
return NULL;
if ((namelen <= 2) && (name[0] == '.') &&
(name[1] == '.' || name[1] == '0')) {
/*
* "." or ".." will only be in the first block
* NFS may look up ".."; "." should be handled by the VFS
*/
block = start = 0;
nblocks = 1;
goto restart;
}
if (is_dx(dir)) {
bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
/*
@ -960,55 +971,35 @@ cleanup_and_exit:
static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
struct ext4_dir_entry_2 **res_dir, int *err)
{
struct super_block * sb;
struct super_block * sb = dir->i_sb;
struct dx_hash_info hinfo;
u32 hash;
struct dx_frame frames[2], *frame;
struct ext4_dir_entry_2 *de, *top;
struct buffer_head *bh;
ext4_lblk_t block;
int retval;
int namelen = d_name->len;
const u8 *name = d_name->name;
sb = dir->i_sb;
/* NFS may look up ".." - look at dx_root directory block */
if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
return NULL;
} else {
frame = frames;
frame->bh = NULL; /* for dx_release() */
frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
dx_set_block(frame->at, 0); /* dx_root block is 0 */
}
hash = hinfo.hash;
if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
return NULL;
do {
block = dx_get_block(frame->at);
if (!(bh = ext4_bread (NULL,dir, block, 0, err)))
if (!(bh = ext4_bread(NULL, dir, block, 0, err)))
goto errout;
de = (struct ext4_dir_entry_2 *) bh->b_data;
top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
EXT4_DIR_REC_LEN(0));
for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
+ ((char *) de - bh->b_data);
if (!ext4_check_dir_entry(dir, de, bh, off)) {
brelse(bh);
*err = ERR_BAD_DX_DIR;
goto errout;
}
if (ext4_match(namelen, name, de)) {
*res_dir = de;
dx_release(frames);
return bh;
}
retval = search_dirblock(bh, dir, d_name,
block << EXT4_BLOCK_SIZE_BITS(sb),
res_dir);
if (retval == 1) { /* Success! */
dx_release(frames);
return bh;
}
brelse(bh);
if (retval == -1) {
*err = ERR_BAD_DX_DIR;
goto errout;
}
/* Check to see if we should continue to search */
retval = ext4_htree_next_block(dir, hash, frame,
retval = ext4_htree_next_block(dir, hinfo.hash, frame,
frames, NULL);
if (retval < 0) {
ext4_warning(sb,

430
fs/ext4/page-io.c Normal file
View file

@ -0,0 +1,430 @@
/*
* linux/fs/ext4/page-io.c
*
* This contains the new page_io functions for ext4
*
* Written by Theodore Ts'o, 2010.
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/time.h>
#include <linux/jbd2.h>
#include <linux/highuid.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
#include <linux/string.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include <linux/mpage.h>
#include <linux/namei.h>
#include <linux/uio.h>
#include <linux/bio.h>
#include <linux/workqueue.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
#include "ext4_extents.h"
static struct kmem_cache *io_page_cachep, *io_end_cachep;
int __init ext4_init_pageio(void)
{
io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
if (io_page_cachep == NULL)
return -ENOMEM;
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
if (io_page_cachep == NULL) {
kmem_cache_destroy(io_page_cachep);
return -ENOMEM;
}
return 0;
}
void ext4_exit_pageio(void)
{
kmem_cache_destroy(io_end_cachep);
kmem_cache_destroy(io_page_cachep);
}
void ext4_free_io_end(ext4_io_end_t *io)
{
int i;
BUG_ON(!io);
if (io->page)
put_page(io->page);
for (i = 0; i < io->num_io_pages; i++) {
if (--io->pages[i]->p_count == 0) {
struct page *page = io->pages[i]->p_page;
end_page_writeback(page);
put_page(page);
kmem_cache_free(io_page_cachep, io->pages[i]);
}
}
io->num_io_pages = 0;
iput(io->inode);
kmem_cache_free(io_end_cachep, io);
}
/*
* check a range of space and convert unwritten extents to written.
*/
int ext4_end_io_nolock(ext4_io_end_t *io)
{
struct inode *inode = io->inode;
loff_t offset = io->offset;
ssize_t size = io->size;
int ret = 0;
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
"list->prev 0x%p\n",
io, inode->i_ino, io->list.next, io->list.prev);
if (list_empty(&io->list))
return ret;
if (!(io->flag & EXT4_IO_END_UNWRITTEN))
return ret;
ret = ext4_convert_unwritten_extents(inode, offset, size);
if (ret < 0) {
printk(KERN_EMERG "%s: failed to convert unwritten "
"extents to written extents, error is %d "
"io is still on inode %lu aio dio list\n",
__func__, ret, inode->i_ino);
return ret;
}
if (io->iocb)
aio_complete(io->iocb, io->result, 0);
/* clear the DIO AIO unwritten flag */
io->flag &= ~EXT4_IO_END_UNWRITTEN;
return ret;
}
/*
* work on completed aio dio IO, to convert unwritten extents to extents
*/
static void ext4_end_io_work(struct work_struct *work)
{
ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
struct inode *inode = io->inode;
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned long flags;
int ret;
mutex_lock(&inode->i_mutex);
ret = ext4_end_io_nolock(io);
if (ret < 0) {
mutex_unlock(&inode->i_mutex);
return;
}
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
if (!list_empty(&io->list))
list_del_init(&io->list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
mutex_unlock(&inode->i_mutex);
ext4_free_io_end(io);
}
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
{
ext4_io_end_t *io = NULL;
io = kmem_cache_alloc(io_end_cachep, flags);
if (io) {
memset(io, 0, sizeof(*io));
io->inode = igrab(inode);
BUG_ON(!io->inode);
INIT_WORK(&io->work, ext4_end_io_work);
INIT_LIST_HEAD(&io->list);
}
return io;
}
/*
* Print an buffer I/O error compatible with the fs/buffer.c. This
* provides compatibility with dmesg scrapers that look for a specific
* buffer I/O error message. We really need a unified error reporting
* structure to userspace ala Digital Unix's uerf system, but it's
* probably not going to happen in my lifetime, due to LKML politics...
*/
static void buffer_io_error(struct buffer_head *bh)
{
char b[BDEVNAME_SIZE];
printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
bdevname(bh->b_bdev, b),
(unsigned long long)bh->b_blocknr);
}
static void ext4_end_bio(struct bio *bio, int error)
{
ext4_io_end_t *io_end = bio->bi_private;
struct workqueue_struct *wq;
struct inode *inode;
unsigned long flags;
ext4_fsblk_t err_block;
int i;
BUG_ON(!io_end);
inode = io_end->inode;
bio->bi_private = NULL;
bio->bi_end_io = NULL;
if (test_bit(BIO_UPTODATE, &bio->bi_flags))
error = 0;
err_block = bio->bi_sector >> (inode->i_blkbits - 9);
bio_put(bio);
if (!(inode->i_sb->s_flags & MS_ACTIVE)) {
pr_err("sb umounted, discard end_io request for inode %lu\n",
io_end->inode->i_ino);
ext4_free_io_end(io_end);
return;
}
if (error) {
io_end->flag |= EXT4_IO_END_ERROR;
ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
"(offset %llu size %ld starting block %llu)",
inode->i_ino,
(unsigned long long) io_end->offset,
(long) io_end->size,
(unsigned long long) err_block);
}
for (i = 0; i < io_end->num_io_pages; i++) {
struct page *page = io_end->pages[i]->p_page;
struct buffer_head *bh, *head;
int partial_write = 0;
head = page_buffers(page);
if (error)
SetPageError(page);
BUG_ON(!head);
if (head->b_size == PAGE_CACHE_SIZE)
clear_buffer_dirty(head);
else {
loff_t offset;
loff_t io_end_offset = io_end->offset + io_end->size;
offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
bh = head;
do {
if ((offset >= io_end->offset) &&
(offset+bh->b_size <= io_end_offset)) {
if (error)
buffer_io_error(bh);
clear_buffer_dirty(bh);
}
if (buffer_delay(bh))
partial_write = 1;
else if (!buffer_mapped(bh))
clear_buffer_dirty(bh);
else if (buffer_dirty(bh))
partial_write = 1;
offset += bh->b_size;
bh = bh->b_this_page;
} while (bh != head);
}
if (--io_end->pages[i]->p_count == 0) {
struct page *page = io_end->pages[i]->p_page;
end_page_writeback(page);
put_page(page);
kmem_cache_free(io_page_cachep, io_end->pages[i]);
}
/*
* If this is a partial write which happened to make
* all buffers uptodate then we can optimize away a
* bogus readpage() for the next read(). Here we
* 'discover' whether the page went uptodate as a
* result of this (potentially partial) write.
*/
if (!partial_write)
SetPageUptodate(page);
}
io_end->num_io_pages = 0;
/* Add the io_end to per-inode completed io list*/
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
/* queue the work to convert unwritten extents to written */
queue_work(wq, &io_end->work);
}
void ext4_io_submit(struct ext4_io_submit *io)
{
struct bio *bio = io->io_bio;
if (bio) {
bio_get(io->io_bio);
submit_bio(io->io_op, io->io_bio);
BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
bio_put(io->io_bio);
}
io->io_bio = 0;
io->io_op = 0;
io->io_end = 0;
}
static int io_submit_init(struct ext4_io_submit *io,
struct inode *inode,
struct writeback_control *wbc,
struct buffer_head *bh)
{
ext4_io_end_t *io_end;
struct page *page = bh->b_page;
int nvecs = bio_get_nr_vecs(bh->b_bdev);
struct bio *bio;
io_end = ext4_init_io_end(inode, GFP_NOFS);
if (!io_end)
return -ENOMEM;
do {
bio = bio_alloc(GFP_NOIO, nvecs);
nvecs >>= 1;
} while (bio == NULL);
bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
bio->bi_private = io->io_end = io_end;
bio->bi_end_io = ext4_end_bio;
io_end->inode = inode;
io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
io->io_bio = bio;
io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?
WRITE_SYNC_PLUG : WRITE);
io->io_next_block = bh->b_blocknr;
return 0;
}
static int io_submit_add_bh(struct ext4_io_submit *io,
struct ext4_io_page *io_page,
struct inode *inode,
struct writeback_control *wbc,
struct buffer_head *bh)
{
ext4_io_end_t *io_end;
int ret;
if (buffer_new(bh)) {
clear_buffer_new(bh);
unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
}
if (!buffer_mapped(bh) || buffer_delay(bh)) {
if (!buffer_mapped(bh))
clear_buffer_dirty(bh);
if (io->io_bio)
ext4_io_submit(io);
return 0;
}
if (io->io_bio && bh->b_blocknr != io->io_next_block) {
submit_and_retry:
ext4_io_submit(io);
}
if (io->io_bio == NULL) {
ret = io_submit_init(io, inode, wbc, bh);
if (ret)
return ret;
}
io_end = io->io_end;
if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
(io_end->pages[io_end->num_io_pages-1] != io_page))
goto submit_and_retry;
if (buffer_uninit(bh))
io->io_end->flag |= EXT4_IO_END_UNWRITTEN;
io->io_end->size += bh->b_size;
io->io_next_block++;
ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
if ((io_end->num_io_pages == 0) ||
(io_end->pages[io_end->num_io_pages-1] != io_page)) {
io_end->pages[io_end->num_io_pages++] = io_page;
io_page->p_count++;
}
return 0;
}
int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
int len,
struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
unsigned block_start, block_end, blocksize;
struct ext4_io_page *io_page;
struct buffer_head *bh, *head;
int ret = 0;
blocksize = 1 << inode->i_blkbits;
BUG_ON(PageWriteback(page));
set_page_writeback(page);
ClearPageError(page);
io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
if (!io_page) {
set_page_dirty(page);
unlock_page(page);
return -ENOMEM;
}
io_page->p_page = page;
io_page->p_count = 0;
get_page(page);
for (bh = head = page_buffers(page), block_start = 0;
bh != head || !block_start;
block_start = block_end, bh = bh->b_this_page) {
block_end = block_start + blocksize;
if (block_start >= len) {
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
continue;
}
ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
if (ret) {
/*
* We only get here on ENOMEM. Not much else
* we can do but mark the page as dirty, and
* better luck next time.
*/
set_page_dirty(page);
break;
}
}
unlock_page(page);
/*
* If the page was truncated before we could do the writeback,
* or we had a memory allocation error while trying to write
* the first buffer head, we won't have submitted any pages for
* I/O. In that case we need to make sure we've cleared the
* PageWriteback bit from the page to prevent the system from
* wedging later on.
*/
if (io_page->p_count == 0) {
put_page(page);
end_page_writeback(page);
kmem_cache_free(io_page_cachep, io_page);
}
return ret;
}

View file

@ -226,23 +226,13 @@ static int setup_new_group_blocks(struct super_block *sb,
}
/* Zero out all of the reserved backup group descriptor table blocks */
for (i = 0, bit = gdblocks + 1, block = start + bit;
i < reserved_gdb; i++, block++, bit++) {
struct buffer_head *gdb;
ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
block, sbi->s_itb_per_group);
err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
GFP_NOFS);
if (err)
goto exit_bh;
ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit);
if ((err = extend_or_restart_transaction(handle, 1, bh)))
goto exit_bh;
if (IS_ERR(gdb = bclean(handle, sb, block))) {
err = PTR_ERR(gdb);
goto exit_bh;
}
ext4_handle_dirty_metadata(handle, NULL, gdb);
ext4_set_bit(bit, bh->b_data);
brelse(gdb);
}
ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
input->block_bitmap - start);
ext4_set_bit(input->block_bitmap - start, bh->b_data);
@ -251,28 +241,18 @@ static int setup_new_group_blocks(struct super_block *sb,
ext4_set_bit(input->inode_bitmap - start, bh->b_data);
/* Zero out all of the inode table blocks */
for (i = 0, block = input->inode_table, bit = block - start;
i < sbi->s_itb_per_group; i++, bit++, block++) {
struct buffer_head *it;
ext4_debug("clear inode block %#04llx (+%d)\n", block, bit);
if ((err = extend_or_restart_transaction(handle, 1, bh)))
goto exit_bh;
if (IS_ERR(it = bclean(handle, sb, block))) {
err = PTR_ERR(it);
goto exit_bh;
}
ext4_handle_dirty_metadata(handle, NULL, it);
brelse(it);
ext4_set_bit(bit, bh->b_data);
}
block = input->inode_table;
ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
block, sbi->s_itb_per_group);
err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
if (err)
goto exit_bh;
if ((err = extend_or_restart_transaction(handle, 2, bh)))
goto exit_bh;
mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data);
ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
bh->b_data);
ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);
/* Mark unused entries in inode bitmap used */
@ -283,8 +263,8 @@ static int setup_new_group_blocks(struct super_block *sb,
goto exit_journal;
}
mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
bh->b_data);
ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
bh->b_data);
ext4_handle_dirty_metadata(handle, NULL, bh);
exit_bh:
brelse(bh);

View file

@ -40,6 +40,9 @@
#include <linux/crc16.h>
#include <asm/uaccess.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
#include "ext4.h"
#include "ext4_jbd2.h"
#include "xattr.h"
@ -49,8 +52,11 @@
#define CREATE_TRACE_POINTS
#include <trace/events/ext4.h>
struct proc_dir_entry *ext4_proc_root;
static struct proc_dir_entry *ext4_proc_root;
static struct kset *ext4_kset;
struct ext4_lazy_init *ext4_li_info;
struct mutex ext4_li_mtx;
struct ext4_features *ext4_feat;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
@ -69,6 +75,8 @@ static void ext4_write_super(struct super_block *sb);
static int ext4_freeze(struct super_block *sb);
static int ext4_get_sb(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data, struct vfsmount *mnt);
static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext3_fs_type = {
@ -701,6 +709,7 @@ static void ext4_put_super(struct super_block *sb)
struct ext4_super_block *es = sbi->s_es;
int i, err;
ext4_unregister_li_request(sb);
dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
flush_workqueue(sbi->dio_unwritten_wq);
@ -717,6 +726,7 @@ static void ext4_put_super(struct super_block *sb)
ext4_abort(sb, "Couldn't clean up the journal");
}
del_timer(&sbi->s_err_report);
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
@ -1042,6 +1052,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
!(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY))
seq_puts(seq, ",block_validity");
if (!test_opt(sb, INIT_INODE_TABLE))
seq_puts(seq, ",noinit_inode_table");
else if (sbi->s_li_wait_mult)
seq_printf(seq, ",init_inode_table=%u",
(unsigned) sbi->s_li_wait_mult);
ext4_show_quota_options(seq, sb);
return 0;
@ -1170,6 +1186,7 @@ static const struct super_operations ext4_sops = {
.quota_write = ext4_quota_write,
#endif
.bdev_try_to_free_page = bdev_try_to_free_page,
.trim_fs = ext4_trim_fs
};
static const struct super_operations ext4_nojournal_sops = {
@ -1216,6 +1233,7 @@ enum {
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard,
Opt_init_inode_table, Opt_noinit_inode_table,
};
static const match_table_t tokens = {
@ -1286,6 +1304,9 @@ static const match_table_t tokens = {
{Opt_dioread_lock, "dioread_lock"},
{Opt_discard, "discard"},
{Opt_nodiscard, "nodiscard"},
{Opt_init_inode_table, "init_itable=%u"},
{Opt_init_inode_table, "init_itable"},
{Opt_noinit_inode_table, "noinit_itable"},
{Opt_err, NULL},
};
@ -1756,6 +1777,20 @@ set_qf_format:
case Opt_dioread_lock:
clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
break;
case Opt_init_inode_table:
set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
if (args[0].from) {
if (match_int(&args[0], &option))
return 0;
} else
option = EXT4_DEF_LI_WAIT_MULT;
if (option < 0)
return 0;
sbi->s_li_wait_mult = option;
break;
case Opt_noinit_inode_table:
clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
break;
default:
ext4_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" "
@ -1939,7 +1974,8 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
}
/* Called at mount-time, super-block is locked */
static int ext4_check_descriptors(struct super_block *sb)
static int ext4_check_descriptors(struct super_block *sb,
ext4_group_t *first_not_zeroed)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
@ -1948,7 +1984,7 @@ static int ext4_check_descriptors(struct super_block *sb)
ext4_fsblk_t inode_bitmap;
ext4_fsblk_t inode_table;
int flexbg_flag = 0;
ext4_group_t i;
ext4_group_t i, grp = sbi->s_groups_count;
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
flexbg_flag = 1;
@ -1964,6 +2000,10 @@ static int ext4_check_descriptors(struct super_block *sb)
last_block = first_block +
(EXT4_BLOCKS_PER_GROUP(sb) - 1);
if ((grp == sbi->s_groups_count) &&
!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
grp = i;
block_bitmap = ext4_block_bitmap(sb, gdp);
if (block_bitmap < first_block || block_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@ -2001,6 +2041,8 @@ static int ext4_check_descriptors(struct super_block *sb)
if (!flexbg_flag)
first_block += EXT4_BLOCKS_PER_GROUP(sb);
}
if (NULL != first_not_zeroed)
*first_not_zeroed = grp;
ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
@ -2373,6 +2415,7 @@ static struct ext4_attr ext4_attr_##_name = { \
#define EXT4_ATTR(name, mode, show, store) \
static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
#define EXT4_RW_ATTR_SBI_UI(name, elname) \
@ -2409,6 +2452,16 @@ static struct attribute *ext4_attrs[] = {
NULL,
};
/* Features this copy of ext4 supports */
EXT4_INFO_ATTR(lazy_itable_init);
EXT4_INFO_ATTR(batched_discard);
static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(lazy_itable_init),
ATTR_LIST(batched_discard),
NULL,
};
static ssize_t ext4_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
@ -2437,7 +2490,6 @@ static void ext4_sb_release(struct kobject *kobj)
complete(&sbi->s_kobj_unregister);
}
static const struct sysfs_ops ext4_attr_ops = {
.show = ext4_attr_show,
.store = ext4_attr_store,
@ -2449,6 +2501,17 @@ static struct kobj_type ext4_ktype = {
.release = ext4_sb_release,
};
static void ext4_feat_release(struct kobject *kobj)
{
complete(&ext4_feat->f_kobj_unregister);
}
static struct kobj_type ext4_feat_ktype = {
.default_attrs = ext4_feat_attrs,
.sysfs_ops = &ext4_attr_ops,
.release = ext4_feat_release,
};
/*
* Check whether this filesystem can be mounted based on
* the features present and the RDONLY/RDWR mount requested.
@ -2539,6 +2602,372 @@ static void print_daily_error_info(unsigned long arg)
mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
}
static void ext4_lazyinode_timeout(unsigned long data)
{
struct task_struct *p = (struct task_struct *)data;
wake_up_process(p);
}
/* Find next suitable group and run ext4_init_inode_table */
static int ext4_run_li_request(struct ext4_li_request *elr)
{
struct ext4_group_desc *gdp = NULL;
ext4_group_t group, ngroups;
struct super_block *sb;
unsigned long timeout = 0;
int ret = 0;
sb = elr->lr_super;
ngroups = EXT4_SB(sb)->s_groups_count;
for (group = elr->lr_next_group; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp) {
ret = 1;
break;
}
if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
break;
}
if (group == ngroups)
ret = 1;
if (!ret) {
timeout = jiffies;
ret = ext4_init_inode_table(sb, group,
elr->lr_timeout ? 0 : 1);
if (elr->lr_timeout == 0) {
timeout = jiffies - timeout;
if (elr->lr_sbi->s_li_wait_mult)
timeout *= elr->lr_sbi->s_li_wait_mult;
else
timeout *= 20;
elr->lr_timeout = timeout;
}
elr->lr_next_sched = jiffies + elr->lr_timeout;
elr->lr_next_group = group + 1;
}
return ret;
}
/*
* Remove lr_request from the list_request and free the
* request tructure. Should be called with li_list_mtx held
*/
static void ext4_remove_li_request(struct ext4_li_request *elr)
{
struct ext4_sb_info *sbi;
if (!elr)
return;
sbi = elr->lr_sbi;
list_del(&elr->lr_request);
sbi->s_li_request = NULL;
kfree(elr);
}
static void ext4_unregister_li_request(struct super_block *sb)
{
struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request;
if (!ext4_li_info)
return;
mutex_lock(&ext4_li_info->li_list_mtx);
ext4_remove_li_request(elr);
mutex_unlock(&ext4_li_info->li_list_mtx);
}
/*
* This is the function where ext4lazyinit thread lives. It walks
* through the request list searching for next scheduled filesystem.
* When such a fs is found, run the lazy initialization request
* (ext4_rn_li_request) and keep track of the time spend in this
* function. Based on that time we compute next schedule time of
* the request. When walking through the list is complete, compute
* next waking time and put itself into sleep.
*/
static int ext4_lazyinit_thread(void *arg)
{
struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
struct list_head *pos, *n;
struct ext4_li_request *elr;
unsigned long next_wakeup;
DEFINE_WAIT(wait);
int ret;
BUG_ON(NULL == eli);
eli->li_timer.data = (unsigned long)current;
eli->li_timer.function = ext4_lazyinode_timeout;
eli->li_task = current;
wake_up(&eli->li_wait_task);
cont_thread:
while (true) {
next_wakeup = MAX_JIFFY_OFFSET;
mutex_lock(&eli->li_list_mtx);
if (list_empty(&eli->li_request_list)) {
mutex_unlock(&eli->li_list_mtx);
goto exit_thread;
}
list_for_each_safe(pos, n, &eli->li_request_list) {
elr = list_entry(pos, struct ext4_li_request,
lr_request);
if (time_after_eq(jiffies, elr->lr_next_sched))
ret = ext4_run_li_request(elr);
if (ret) {
ret = 0;
ext4_remove_li_request(elr);
continue;
}
if (time_before(elr->lr_next_sched, next_wakeup))
next_wakeup = elr->lr_next_sched;
}
mutex_unlock(&eli->li_list_mtx);
if (freezing(current))
refrigerator();
if (time_after_eq(jiffies, next_wakeup)) {
cond_resched();
continue;
}
eli->li_timer.expires = next_wakeup;
add_timer(&eli->li_timer);
prepare_to_wait(&eli->li_wait_daemon, &wait,
TASK_INTERRUPTIBLE);
if (time_before(jiffies, next_wakeup))
schedule();
finish_wait(&eli->li_wait_daemon, &wait);
}
exit_thread:
/*
* It looks like the request list is empty, but we need
* to check it under the li_list_mtx lock, to prevent any
* additions into it, and of course we should lock ext4_li_mtx
* to atomically free the list and ext4_li_info, because at
* this point another ext4 filesystem could be registering
* new one.
*/
mutex_lock(&ext4_li_mtx);
mutex_lock(&eli->li_list_mtx);
if (!list_empty(&eli->li_request_list)) {
mutex_unlock(&eli->li_list_mtx);
mutex_unlock(&ext4_li_mtx);
goto cont_thread;
}
mutex_unlock(&eli->li_list_mtx);
del_timer_sync(&ext4_li_info->li_timer);
eli->li_task = NULL;
wake_up(&eli->li_wait_task);
kfree(ext4_li_info);
ext4_li_info = NULL;
mutex_unlock(&ext4_li_mtx);
return 0;
}
static void ext4_clear_request_list(void)
{
struct list_head *pos, *n;
struct ext4_li_request *elr;
mutex_lock(&ext4_li_info->li_list_mtx);
if (list_empty(&ext4_li_info->li_request_list))
return;
list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
elr = list_entry(pos, struct ext4_li_request,
lr_request);
ext4_remove_li_request(elr);
}
mutex_unlock(&ext4_li_info->li_list_mtx);
}
static int ext4_run_lazyinit_thread(void)
{
struct task_struct *t;
t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit");
if (IS_ERR(t)) {
int err = PTR_ERR(t);
ext4_clear_request_list();
del_timer_sync(&ext4_li_info->li_timer);
kfree(ext4_li_info);
ext4_li_info = NULL;
printk(KERN_CRIT "EXT4: error %d creating inode table "
"initialization thread\n",
err);
return err;
}
ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL);
return 0;
}
/*
* Check whether it make sense to run itable init. thread or not.
* If there is at least one uninitialized inode table, return
* corresponding group number, else the loop goes through all
* groups and return total number of groups.
*/
static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
{
ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
struct ext4_group_desc *gdp = NULL;
for (group = 0; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp)
continue;
if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
break;
}
return group;
}
static int ext4_li_info_new(void)
{
struct ext4_lazy_init *eli = NULL;
eli = kzalloc(sizeof(*eli), GFP_KERNEL);
if (!eli)
return -ENOMEM;
eli->li_task = NULL;
INIT_LIST_HEAD(&eli->li_request_list);
mutex_init(&eli->li_list_mtx);
init_waitqueue_head(&eli->li_wait_daemon);
init_waitqueue_head(&eli->li_wait_task);
init_timer(&eli->li_timer);
eli->li_state |= EXT4_LAZYINIT_QUIT;
ext4_li_info = eli;
return 0;
}
static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
ext4_group_t start)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_li_request *elr;
unsigned long rnd;
elr = kzalloc(sizeof(*elr), GFP_KERNEL);
if (!elr)
return NULL;
elr->lr_super = sb;
elr->lr_sbi = sbi;
elr->lr_next_group = start;
/*
* Randomize first schedule time of the request to
* spread the inode table initialization requests
* better.
*/
get_random_bytes(&rnd, sizeof(rnd));
elr->lr_next_sched = jiffies + (unsigned long)rnd %
(EXT4_DEF_LI_MAX_START_DELAY * HZ);
return elr;
}
static int ext4_register_li_request(struct super_block *sb,
ext4_group_t first_not_zeroed)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_li_request *elr;
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
int ret;
if (sbi->s_li_request != NULL)
return 0;
if (first_not_zeroed == ngroups ||
(sb->s_flags & MS_RDONLY) ||
!test_opt(sb, INIT_INODE_TABLE)) {
sbi->s_li_request = NULL;
return 0;
}
if (first_not_zeroed == ngroups) {
sbi->s_li_request = NULL;
return 0;
}
elr = ext4_li_request_new(sb, first_not_zeroed);
if (!elr)
return -ENOMEM;
mutex_lock(&ext4_li_mtx);
if (NULL == ext4_li_info) {
ret = ext4_li_info_new();
if (ret)
goto out;
}
mutex_lock(&ext4_li_info->li_list_mtx);
list_add(&elr->lr_request, &ext4_li_info->li_request_list);
mutex_unlock(&ext4_li_info->li_list_mtx);
sbi->s_li_request = elr;
if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
ret = ext4_run_lazyinit_thread();
if (ret)
goto out;
}
out:
mutex_unlock(&ext4_li_mtx);
if (ret)
kfree(elr);
return ret;
}
/*
* We do not need to lock anything since this is called on
* module unload.
*/
static void ext4_destroy_lazyinit_thread(void)
{
/*
* If thread exited earlier
* there's nothing to be done.
*/
if (!ext4_li_info)
return;
ext4_clear_request_list();
while (ext4_li_info->li_task) {
wake_up(&ext4_li_info->li_wait_daemon);
wait_event(ext4_li_info->li_wait_task,
ext4_li_info->li_task == NULL);
}
}
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
__releases(kernel_lock)
__acquires(kernel_lock)
@ -2564,6 +2993,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
__u64 blocks_count;
int err;
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
ext4_group_t first_not_zeroed;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
@ -2624,6 +3054,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
/* Set defaults before we parse the mount options */
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
if (def_mount_opts & EXT4_DEFM_DEBUG)
set_opt(sbi->s_mount_opt, DEBUG);
if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
@ -2901,7 +3332,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount2;
}
}
if (!ext4_check_descriptors(sb)) {
if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
goto failed_mount2;
}
@ -3122,6 +3553,10 @@ no_journal:
goto failed_mount4;
}
err = ext4_register_li_request(sb, first_not_zeroed);
if (err)
goto failed_mount4;
sbi->s_kobj.kset = ext4_kset;
init_completion(&sbi->s_kobj_unregister);
err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
@ -3461,7 +3896,7 @@ static int ext4_load_journal(struct super_block *sb,
EXT4_SB(sb)->s_journal = journal;
ext4_clear_journal_err(sb, es);
if (journal_devnum &&
if (!really_read_only && journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
es->s_journal_dev = cpu_to_le32(journal_devnum);
@ -3514,9 +3949,12 @@ static int ext4_commit_super(struct super_block *sb, int sync)
else
es->s_kbytes_written =
cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter))
ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeblocks_counter));
es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
es->s_free_inodes_count =
cpu_to_le32(percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeinodes_counter));
sb->s_dirt = 0;
BUFFER_TRACE(sbh, "marking dirty");
@ -3835,6 +4273,19 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
enable_quota = 1;
}
}
/*
* Reinitialize lazy itable initialization thread based on
* current settings
*/
if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
ext4_unregister_li_request(sb);
else {
ext4_group_t first_not_zeroed;
first_not_zeroed = ext4_has_uninit_itable(sb);
ext4_register_li_request(sb, first_not_zeroed);
}
ext4_setup_system_zone(sb);
if (sbi->s_journal == NULL)
ext4_commit_super(sb, 1);
@ -4276,23 +4727,53 @@ static struct file_system_type ext4_fs_type = {
.fs_flags = FS_REQUIRES_DEV,
};
static int __init init_ext4_fs(void)
int __init ext4_init_feat_adverts(void)
{
struct ext4_features *ef;
int ret = -ENOMEM;
ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
if (!ef)
goto out;
ef->f_kobj.kset = ext4_kset;
init_completion(&ef->f_kobj_unregister);
ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
"features");
if (ret) {
kfree(ef);
goto out;
}
ext4_feat = ef;
ret = 0;
out:
return ret;
}
static int __init ext4_init_fs(void)
{
int err;
ext4_check_flag_values();
err = init_ext4_system_zone();
err = ext4_init_pageio();
if (err)
return err;
err = ext4_init_system_zone();
if (err)
goto out5;
ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
if (!ext4_kset)
goto out4;
ext4_proc_root = proc_mkdir("fs/ext4", NULL);
err = init_ext4_mballoc();
err = ext4_init_feat_adverts();
err = ext4_init_mballoc();
if (err)
goto out3;
err = init_ext4_xattr();
err = ext4_init_xattr();
if (err)
goto out2;
err = init_inodecache();
@ -4303,38 +4784,46 @@ static int __init init_ext4_fs(void)
err = register_filesystem(&ext4_fs_type);
if (err)
goto out;
ext4_li_info = NULL;
mutex_init(&ext4_li_mtx);
return 0;
out:
unregister_as_ext2();
unregister_as_ext3();
destroy_inodecache();
out1:
exit_ext4_xattr();
ext4_exit_xattr();
out2:
exit_ext4_mballoc();
ext4_exit_mballoc();
out3:
kfree(ext4_feat);
remove_proc_entry("fs/ext4", NULL);
kset_unregister(ext4_kset);
out4:
exit_ext4_system_zone();
ext4_exit_system_zone();
out5:
ext4_exit_pageio();
return err;
}
static void __exit exit_ext4_fs(void)
static void __exit ext4_exit_fs(void)
{
ext4_destroy_lazyinit_thread();
unregister_as_ext2();
unregister_as_ext3();
unregister_filesystem(&ext4_fs_type);
destroy_inodecache();
exit_ext4_xattr();
exit_ext4_mballoc();
ext4_exit_xattr();
ext4_exit_mballoc();
remove_proc_entry("fs/ext4", NULL);
kset_unregister(ext4_kset);
exit_ext4_system_zone();
ext4_exit_system_zone();
ext4_exit_pageio();
}
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
MODULE_DESCRIPTION("Fourth Extended Filesystem");
MODULE_LICENSE("GPL");
module_init(init_ext4_fs)
module_exit(exit_ext4_fs)
module_init(ext4_init_fs)
module_exit(ext4_exit_fs)

View file

@ -1588,7 +1588,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
#undef BLOCK_HASH_SHIFT
int __init
init_ext4_xattr(void)
ext4_init_xattr(void)
{
ext4_xattr_cache = mb_cache_create("ext4_xattr", 6);
if (!ext4_xattr_cache)
@ -1597,7 +1597,7 @@ init_ext4_xattr(void)
}
void
exit_ext4_xattr(void)
ext4_exit_xattr(void)
{
if (ext4_xattr_cache)
mb_cache_destroy(ext4_xattr_cache);

View file

@ -83,8 +83,8 @@ extern void ext4_xattr_put_super(struct super_block *);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
extern int init_ext4_xattr(void);
extern void exit_ext4_xattr(void);
extern int __init ext4_init_xattr(void);
extern void ext4_exit_xattr(void);
extern const struct xattr_handler *ext4_xattr_handlers[];
@ -121,14 +121,14 @@ ext4_xattr_put_super(struct super_block *sb)
{
}
static inline int
static __init inline int
init_ext4_xattr(void)
{
return 0;
}
static inline void
exit_ext4_xattr(void)
ext4_exit_xattr(void)
{
}

View file

@ -530,6 +530,41 @@ static int ioctl_fsthaw(struct file *filp)
return thaw_super(sb);
}
static int ioctl_fstrim(struct file *filp, void __user *argp)
{
struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
struct fstrim_range range;
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
/* If filesystem doesn't support trim feature, return. */
if (sb->s_op->trim_fs == NULL)
return -EOPNOTSUPP;
/* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
if (sb->s_bdev == NULL)
return -EINVAL;
if (argp == NULL) {
range.start = 0;
range.len = ULLONG_MAX;
range.minlen = 0;
} else if (copy_from_user(&range, argp, sizeof(range)))
return -EFAULT;
ret = sb->s_op->trim_fs(sb, &range);
if (ret < 0)
return ret;
if ((argp != NULL) &&
(copy_to_user(argp, &range, sizeof(range))))
return -EFAULT;
return 0;
}
/*
* When you add any new common ioctls to the switches above and below
* please update compat_sys_ioctl() too.
@ -580,6 +615,10 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
error = ioctl_fsthaw(filp);
break;
case FITRIM:
error = ioctl_fstrim(filp, argp);
break;
case FS_IOC_FIEMAP:
return ioctl_fiemap(filp, arg);

View file

@ -299,6 +299,16 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
transaction->t_chp_stats.cs_forced_to_close++;
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
if (unlikely(journal->j_flags & JBD2_UNMOUNT))
/*
* The journal thread is dead; so starting and
* waiting for a commit to finish will cause
* us to wait for a _very_ long time.
*/
printk(KERN_ERR "JBD2: %s: "
"Waiting for Godot: block %llu\n",
journal->j_devname,
(unsigned long long) bh->b_blocknr);
jbd2_log_start_commit(journal, tid);
jbd2_log_wait_commit(journal, tid);
ret = 1;

View file

@ -26,7 +26,9 @@
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/bitops.h>
#include <trace/events/jbd2.h>
#include <asm/system.h>
/*
* Default IO end handler for temporary BJ_IO buffer_heads.
@ -201,7 +203,7 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
mapping = jinode->i_vfs_inode->i_mapping;
jinode->i_flags |= JI_COMMIT_RUNNING;
set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
spin_unlock(&journal->j_list_lock);
/*
* submit the inode data buffers. We use writepage
@ -216,7 +218,8 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction);
commit_transaction->t_flushed_data_blocks = 1;
jinode->i_flags &= ~JI_COMMIT_RUNNING;
clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
smp_mb__after_clear_bit();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
spin_unlock(&journal->j_list_lock);
@ -237,7 +240,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
/* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
jinode->i_flags |= JI_COMMIT_RUNNING;
set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
spin_unlock(&journal->j_list_lock);
err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
if (err) {
@ -253,7 +256,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
ret = err;
}
spin_lock(&journal->j_list_lock);
jinode->i_flags &= ~JI_COMMIT_RUNNING;
clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
smp_mb__after_clear_bit();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}

View file

@ -42,12 +42,14 @@
#include <linux/log2.h>
#include <linux/vmalloc.h>
#include <linux/backing-dev.h>
#include <linux/bitops.h>
#define CREATE_TRACE_POINTS
#include <trace/events/jbd2.h>
#include <asm/uaccess.h>
#include <asm/page.h>
#include <asm/system.h>
EXPORT_SYMBOL(jbd2_journal_extend);
EXPORT_SYMBOL(jbd2_journal_stop);
@ -2210,7 +2212,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
restart:
spin_lock(&journal->j_list_lock);
/* Is commit writing out inode - we have to wait */
if (jinode->i_flags & JI_COMMIT_RUNNING) {
if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) {
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);

View file

@ -156,6 +156,7 @@ alloc_transaction:
*/
repeat:
read_lock(&journal->j_state_lock);
BUG_ON(journal->j_flags & JBD2_UNMOUNT);
if (is_journal_aborted(journal) ||
(journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
read_unlock(&journal->j_state_lock);

View file

@ -891,6 +891,14 @@ static inline int sb_issue_discard(struct super_block *sb, sector_t block,
nr_blocks << (sb->s_blocksize_bits - 9),
gfp_mask, flags);
}
static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
sector_t nr_blocks, gfp_t gfp_mask)
{
return blkdev_issue_zeroout(sb->s_bdev,
block << (sb->s_blocksize_bits - 9),
nr_blocks << (sb->s_blocksize_bits - 9),
gfp_mask);
}
extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);

View file

@ -32,6 +32,12 @@
#define SEEK_END 2 /* seek relative to end of file */
#define SEEK_MAX SEEK_END
struct fstrim_range {
uint64_t start;
uint64_t len;
uint64_t minlen;
};
/* And dynamically-tunable limits and defaults: */
struct files_stat_struct {
unsigned long nr_files; /* read only */
@ -317,6 +323,7 @@ struct inodes_stat_t {
#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
#define FIFREEZE _IOWR('X', 119, int) /* Freeze */
#define FITHAW _IOWR('X', 120, int) /* Thaw */
#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */
#define FS_IOC_GETFLAGS _IOR('f', 1, long)
#define FS_IOC_SETFLAGS _IOW('f', 2, long)
@ -1604,6 +1611,7 @@ struct super_operations {
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
#endif
int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
int (*trim_fs) (struct super_block *, struct fstrim_range *);
};
/*

View file

@ -395,7 +395,7 @@ struct jbd2_inode {
struct inode *i_vfs_inode;
/* Flags of inode [j_list_lock] */
unsigned int i_flags;
unsigned long i_flags;
};
struct jbd2_revoke_table_s;

View file

@ -78,6 +78,11 @@ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc)
return 1;
}
static inline int percpu_counter_initialized(struct percpu_counter *fbc)
{
return (fbc->counters != NULL);
}
#else
struct percpu_counter {
@ -143,6 +148,11 @@ static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
return percpu_counter_read(fbc);
}
static inline int percpu_counter_initialized(struct percpu_counter *fbc)
{
return 1;
}
#endif /* CONFIG_SMP */
static inline void percpu_counter_inc(struct percpu_counter *fbc)

View file

@ -141,6 +141,8 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
int generic_writepages(struct address_space *mapping,
struct writeback_control *wbc);
void tag_pages_for_writeback(struct address_space *mapping,
pgoff_t start, pgoff_t end);
int write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc, writepage_t writepage,
void *data);

View file

@ -21,7 +21,8 @@ TRACE_EVENT(ext4_free_inode,
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( umode_t, mode )
__field( uid_t, uid )
@ -30,7 +31,8 @@ TRACE_EVENT(ext4_free_inode,
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->uid = inode->i_uid;
@ -38,9 +40,10 @@ TRACE_EVENT(ext4_free_inode,
__entry->blocks = inode->i_blocks;
),
TP_printk("dev %s ino %lu mode 0%o uid %u gid %u blocks %llu",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->mode, __entry->uid, __entry->gid,
TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino, __entry->mode,
__entry->uid, __entry->gid,
(unsigned long long) __entry->blocks)
);
@ -50,20 +53,22 @@ TRACE_EVENT(ext4_request_inode,
TP_ARGS(dir, mode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, dir )
__field( umode_t, mode )
),
TP_fast_assign(
__entry->dev = dir->i_sb->s_dev;
__entry->dev_major = MAJOR(dir->i_sb->s_dev);
__entry->dev_minor = MINOR(dir->i_sb->s_dev);
__entry->dir = dir->i_ino;
__entry->mode = mode;
),
TP_printk("dev %s dir %lu mode 0%o",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->dir,
__entry->mode)
TP_printk("dev %d,%d dir %lu mode 0%o",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->dir, __entry->mode)
);
TRACE_EVENT(ext4_allocate_inode,
@ -72,21 +77,24 @@ TRACE_EVENT(ext4_allocate_inode,
TP_ARGS(inode, dir, mode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( ino_t, dir )
__field( umode_t, mode )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->dir = dir->i_ino;
__entry->mode = mode;
),
TP_printk("dev %s ino %lu dir %lu mode 0%o",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
TP_printk("dev %d,%d ino %lu dir %lu mode 0%o",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino,
(unsigned long) __entry->dir, __entry->mode)
);
@ -98,7 +106,8 @@ DECLARE_EVENT_CLASS(ext4__write_begin,
TP_ARGS(inode, pos, len, flags),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
@ -106,15 +115,17 @@ DECLARE_EVENT_CLASS(ext4__write_begin,
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->flags = flags;
),
TP_printk("dev %s ino %lu pos %llu len %u flags %u",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino,
__entry->pos, __entry->len, __entry->flags)
);
@ -141,7 +152,8 @@ DECLARE_EVENT_CLASS(ext4__write_end,
TP_ARGS(inode, pos, len, copied),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
@ -149,16 +161,18 @@ DECLARE_EVENT_CLASS(ext4__write_end,
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->copied = copied;
),
TP_printk("dev %s ino %lu pos %llu len %u copied %u",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->pos, __entry->len, __entry->copied)
TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino, __entry->pos,
__entry->len, __entry->copied)
);
DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end,
@ -199,21 +213,23 @@ TRACE_EVENT(ext4_writepage,
TP_ARGS(inode, page),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( pgoff_t, index )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->index = page->index;
),
TP_printk("dev %s ino %lu page_index %lu",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->index)
TP_printk("dev %d,%d ino %lu page_index %lu",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino, __entry->index)
);
TRACE_EVENT(ext4_da_writepages,
@ -222,13 +238,13 @@ TRACE_EVENT(ext4_da_writepages,
TP_ARGS(inode, wbc),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( long, nr_to_write )
__field( long, pages_skipped )
__field( loff_t, range_start )
__field( loff_t, range_end )
__field( char, nonblocking )
__field( char, for_kupdate )
__field( char, for_reclaim )
__field( char, range_cyclic )
@ -236,7 +252,8 @@ TRACE_EVENT(ext4_da_writepages,
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->nr_to_write = wbc->nr_to_write;
__entry->pages_skipped = wbc->pages_skipped;
@ -248,11 +265,11 @@ TRACE_EVENT(ext4_da_writepages,
__entry->writeback_index = inode->i_mapping->writeback_index;
),
TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld "
TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld "
"range_start %llu range_end %llu "
"for_kupdate %d for_reclaim %d "
"range_cyclic %d writeback_index %lu",
jbd2_dev_to_name(__entry->dev),
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino, __entry->nr_to_write,
__entry->pages_skipped, __entry->range_start,
__entry->range_end,
@ -267,7 +284,8 @@ TRACE_EVENT(ext4_da_write_pages,
TP_ARGS(inode, mpd),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( __u64, b_blocknr )
__field( __u32, b_size )
@ -278,7 +296,8 @@ TRACE_EVENT(ext4_da_write_pages,
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->b_blocknr = mpd->b_blocknr;
__entry->b_size = mpd->b_size;
@ -288,8 +307,9 @@ TRACE_EVENT(ext4_da_write_pages,
__entry->pages_written = mpd->pages_written;
),
TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
TP_printk("dev %d,%d ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino,
__entry->b_blocknr, __entry->b_size,
__entry->b_state, __entry->first_page,
__entry->io_done, __entry->pages_written)
@ -302,7 +322,8 @@ TRACE_EVENT(ext4_da_writepages_result,
TP_ARGS(inode, wbc, ret, pages_written),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( int, ret )
__field( int, pages_written )
@ -312,7 +333,8 @@ TRACE_EVENT(ext4_da_writepages_result,
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->ret = ret;
__entry->pages_written = pages_written;
@ -321,8 +343,8 @@ TRACE_EVENT(ext4_da_writepages_result,
__entry->writeback_index = inode->i_mapping->writeback_index;
),
TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld more_io %d writeback_index %lu",
jbd2_dev_to_name(__entry->dev),
TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld more_io %d writeback_index %lu",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino, __entry->ret,
__entry->pages_written, __entry->pages_skipped,
__entry->more_io,
@ -336,20 +358,23 @@ TRACE_EVENT(ext4_discard_blocks,
TP_ARGS(sb, blk, count),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( __u64, blk )
__field( __u64, count )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->dev_major = MAJOR(sb->s_dev);
__entry->dev_minor = MINOR(sb->s_dev);
__entry->blk = blk;
__entry->count = count;
),
TP_printk("dev %s blk %llu count %llu",
jbd2_dev_to_name(__entry->dev), __entry->blk, __entry->count)
TP_printk("dev %d,%d blk %llu count %llu",
__entry->dev_major, __entry->dev_minor,
__entry->blk, __entry->count)
);
DECLARE_EVENT_CLASS(ext4__mb_new_pa,
@ -359,7 +384,8 @@ DECLARE_EVENT_CLASS(ext4__mb_new_pa,
TP_ARGS(ac, pa),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( __u64, pa_pstart )
__field( __u32, pa_len )
@ -368,16 +394,18 @@ DECLARE_EVENT_CLASS(ext4__mb_new_pa,
),
TP_fast_assign(
__entry->dev = ac->ac_sb->s_dev;
__entry->dev_major = MAJOR(ac->ac_sb->s_dev);
__entry->dev_minor = MINOR(ac->ac_sb->s_dev);
__entry->ino = ac->ac_inode->i_ino;
__entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len;
__entry->pa_lstart = pa->pa_lstart;
),
TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
TP_printk("dev %d,%d ino %lu pstart %llu len %u lstart %llu",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino, __entry->pa_pstart,
__entry->pa_len, __entry->pa_lstart)
);
DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa,
@ -398,14 +426,15 @@ DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa,
TRACE_EVENT(ext4_mb_release_inode_pa,
TP_PROTO(struct super_block *sb,
struct ext4_allocation_context *ac,
struct inode *inode,
struct ext4_prealloc_space *pa,
unsigned long long block, unsigned int count),
TP_ARGS(sb, ac, pa, block, count),
TP_ARGS(sb, inode, pa, block, count),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( __u64, block )
__field( __u32, count )
@ -413,43 +442,42 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->ino = (ac && ac->ac_inode) ?
ac->ac_inode->i_ino : 0;
__entry->dev_major = MAJOR(sb->s_dev);
__entry->dev_minor = MINOR(sb->s_dev);
__entry->ino = inode->i_ino;
__entry->block = block;
__entry->count = count;
),
TP_printk("dev %s ino %lu block %llu count %u",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->block, __entry->count)
TP_printk("dev %d,%d ino %lu block %llu count %u",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino, __entry->block, __entry->count)
);
TRACE_EVENT(ext4_mb_release_group_pa,
TP_PROTO(struct super_block *sb,
struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa),
TP_ARGS(sb, ac, pa),
TP_ARGS(sb, pa),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( int, dev_major )
__field( int, dev_minor )
__field( __u64, pa_pstart )
__field( __u32, pa_len )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->ino = (ac && ac->ac_inode) ?
ac->ac_inode->i_ino : 0;
__entry->dev_major = MAJOR(sb->s_dev);
__entry->dev_minor = MINOR(sb->s_dev);
__entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len;
),
TP_printk("dev %s pstart %llu len %u",
jbd2_dev_to_name(__entry->dev), __entry->pa_pstart, __entry->pa_len)
TP_printk("dev %d,%d pstart %llu len %u",
__entry->dev_major, __entry->dev_minor,
__entry->pa_pstart, __entry->pa_len)
);
TRACE_EVENT(ext4_discard_preallocations,
@ -458,18 +486,21 @@ TRACE_EVENT(ext4_discard_preallocations,
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
),
TP_printk("dev %s ino %lu",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
TP_printk("dev %d,%d ino %lu",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino)
);
TRACE_EVENT(ext4_mb_discard_preallocations,
@ -478,18 +509,20 @@ TRACE_EVENT(ext4_mb_discard_preallocations,
TP_ARGS(sb, needed),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( int, needed )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->dev_major = MAJOR(sb->s_dev);
__entry->dev_minor = MINOR(sb->s_dev);
__entry->needed = needed;
),
TP_printk("dev %s needed %d",
jbd2_dev_to_name(__entry->dev), __entry->needed)
TP_printk("dev %d,%d needed %d",
__entry->dev_major, __entry->dev_minor, __entry->needed)
);
TRACE_EVENT(ext4_request_blocks,
@ -498,7 +531,8 @@ TRACE_EVENT(ext4_request_blocks,
TP_ARGS(ar),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( unsigned int, flags )
__field( unsigned int, len )
@ -511,7 +545,8 @@ TRACE_EVENT(ext4_request_blocks,
),
TP_fast_assign(
__entry->dev = ar->inode->i_sb->s_dev;
__entry->dev_major = MAJOR(ar->inode->i_sb->s_dev);
__entry->dev_minor = MINOR(ar->inode->i_sb->s_dev);
__entry->ino = ar->inode->i_ino;
__entry->flags = ar->flags;
__entry->len = ar->len;
@ -523,8 +558,9 @@ TRACE_EVENT(ext4_request_blocks,
__entry->pright = ar->pright;
),
TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino,
__entry->flags, __entry->len,
(unsigned long long) __entry->logical,
(unsigned long long) __entry->goal,
@ -540,7 +576,8 @@ TRACE_EVENT(ext4_allocate_blocks,
TP_ARGS(ar, block),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( __u64, block )
__field( unsigned int, flags )
@ -554,7 +591,8 @@ TRACE_EVENT(ext4_allocate_blocks,
),
TP_fast_assign(
__entry->dev = ar->inode->i_sb->s_dev;
__entry->dev_major = MAJOR(ar->inode->i_sb->s_dev);
__entry->dev_minor = MINOR(ar->inode->i_sb->s_dev);
__entry->ino = ar->inode->i_ino;
__entry->block = block;
__entry->flags = ar->flags;
@ -567,9 +605,10 @@ TRACE_EVENT(ext4_allocate_blocks,
__entry->pright = ar->pright;
),
TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->flags, __entry->len, __entry->block,
TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino, __entry->flags,
__entry->len, __entry->block,
(unsigned long long) __entry->logical,
(unsigned long long) __entry->goal,
(unsigned long long) __entry->lleft,
@ -585,7 +624,8 @@ TRACE_EVENT(ext4_free_blocks,
TP_ARGS(inode, block, count, flags),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, block )
@ -594,7 +634,8 @@ TRACE_EVENT(ext4_free_blocks,
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->block = block;
@ -602,8 +643,9 @@ TRACE_EVENT(ext4_free_blocks,
__entry->flags = flags;
),
TP_printk("dev %s ino %lu mode 0%o block %llu count %lu flags %d",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %d",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino,
__entry->mode, __entry->block, __entry->count,
__entry->flags)
);
@ -614,7 +656,8 @@ TRACE_EVENT(ext4_sync_file,
TP_ARGS(file, datasync),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( ino_t, parent )
__field( int, datasync )
@ -623,14 +666,16 @@ TRACE_EVENT(ext4_sync_file,
TP_fast_assign(
struct dentry *dentry = file->f_path.dentry;
__entry->dev = dentry->d_inode->i_sb->s_dev;
__entry->dev_major = MAJOR(dentry->d_inode->i_sb->s_dev);
__entry->dev_minor = MINOR(dentry->d_inode->i_sb->s_dev);
__entry->ino = dentry->d_inode->i_ino;
__entry->datasync = datasync;
__entry->parent = dentry->d_parent->d_inode->i_ino;
),
TP_printk("dev %s ino %ld parent %ld datasync %d ",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
TP_printk("dev %d,%d ino %ld parent %ld datasync %d ",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino,
(unsigned long) __entry->parent, __entry->datasync)
);
@ -640,18 +685,20 @@ TRACE_EVENT(ext4_sync_fs,
TP_ARGS(sb, wait),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( int, wait )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->dev_major = MAJOR(sb->s_dev);
__entry->dev_minor = MINOR(sb->s_dev);
__entry->wait = wait;
),
TP_printk("dev %s wait %d", jbd2_dev_to_name(__entry->dev),
__entry->wait)
TP_printk("dev %d,%d wait %d", __entry->dev_major,
__entry->dev_minor, __entry->wait)
);
TRACE_EVENT(ext4_alloc_da_blocks,
@ -660,21 +707,24 @@ TRACE_EVENT(ext4_alloc_da_blocks,
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( unsigned int, data_blocks )
__field( unsigned int, meta_blocks )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
__entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
),
TP_printk("dev %s ino %lu data_blocks %u meta_blocks %u",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
TP_printk("dev %d,%d ino %lu data_blocks %u meta_blocks %u",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino,
__entry->data_blocks, __entry->meta_blocks)
);
@ -684,7 +734,8 @@ TRACE_EVENT(ext4_mballoc_alloc,
TP_ARGS(ac),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( __u16, found )
__field( __u16, groups )
@ -707,7 +758,8 @@ TRACE_EVENT(ext4_mballoc_alloc,
),
TP_fast_assign(
__entry->dev = ac->ac_inode->i_sb->s_dev;
__entry->dev_major = MAJOR(ac->ac_inode->i_sb->s_dev);
__entry->dev_minor = MINOR(ac->ac_inode->i_sb->s_dev);
__entry->ino = ac->ac_inode->i_ino;
__entry->found = ac->ac_found;
__entry->flags = ac->ac_flags;
@ -729,10 +781,11 @@ TRACE_EVENT(ext4_mballoc_alloc,
__entry->result_len = ac->ac_f_ex.fe_len;
),
TP_printk("dev %s inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
"result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x "
"tail %u broken %u",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino,
__entry->orig_group, __entry->orig_start,
__entry->orig_len, __entry->orig_logical,
__entry->goal_group, __entry->goal_start,
@ -750,7 +803,8 @@ TRACE_EVENT(ext4_mballoc_prealloc,
TP_ARGS(ac),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( __u32, orig_logical )
__field( int, orig_start )
@ -763,7 +817,8 @@ TRACE_EVENT(ext4_mballoc_prealloc,
),
TP_fast_assign(
__entry->dev = ac->ac_inode->i_sb->s_dev;
__entry->dev_major = MAJOR(ac->ac_inode->i_sb->s_dev);
__entry->dev_minor = MINOR(ac->ac_inode->i_sb->s_dev);
__entry->ino = ac->ac_inode->i_ino;
__entry->orig_logical = ac->ac_o_ex.fe_logical;
__entry->orig_start = ac->ac_o_ex.fe_start;
@ -775,8 +830,9 @@ TRACE_EVENT(ext4_mballoc_prealloc,
__entry->result_len = ac->ac_b_ex.fe_len;
),
TP_printk("dev %s inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino,
__entry->orig_group, __entry->orig_start,
__entry->orig_len, __entry->orig_logical,
__entry->result_group, __entry->result_start,
@ -784,46 +840,59 @@ TRACE_EVENT(ext4_mballoc_prealloc,
);
DECLARE_EVENT_CLASS(ext4__mballoc,
TP_PROTO(struct ext4_allocation_context *ac),
TP_PROTO(struct super_block *sb,
struct inode *inode,
ext4_group_t group,
ext4_grpblk_t start,
ext4_grpblk_t len),
TP_ARGS(ac),
TP_ARGS(sb, inode, group, start, len),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( __u32, result_logical )
__field( int, result_start )
__field( __u32, result_group )
__field( int, result_len )
),
TP_fast_assign(
__entry->dev = ac->ac_inode->i_sb->s_dev;
__entry->ino = ac->ac_inode->i_ino;
__entry->result_logical = ac->ac_b_ex.fe_logical;
__entry->result_start = ac->ac_b_ex.fe_start;
__entry->result_group = ac->ac_b_ex.fe_group;
__entry->result_len = ac->ac_b_ex.fe_len;
__entry->dev_major = MAJOR(sb->s_dev);
__entry->dev_minor = MINOR(sb->s_dev);
__entry->ino = inode ? inode->i_ino : 0;
__entry->result_start = start;
__entry->result_group = group;
__entry->result_len = len;
),
TP_printk("dev %s inode %lu extent %u/%d/%u@%u ",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
TP_printk("dev %d,%d inode %lu extent %u/%d/%u ",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino,
__entry->result_group, __entry->result_start,
__entry->result_len, __entry->result_logical)
__entry->result_len)
);
DEFINE_EVENT(ext4__mballoc, ext4_mballoc_discard,
TP_PROTO(struct ext4_allocation_context *ac),
TP_PROTO(struct super_block *sb,
struct inode *inode,
ext4_group_t group,
ext4_grpblk_t start,
ext4_grpblk_t len),
TP_ARGS(ac)
TP_ARGS(sb, inode, group, start, len)
);
DEFINE_EVENT(ext4__mballoc, ext4_mballoc_free,
TP_PROTO(struct ext4_allocation_context *ac),
TP_PROTO(struct super_block *sb,
struct inode *inode,
ext4_group_t group,
ext4_grpblk_t start,
ext4_grpblk_t len),
TP_ARGS(ac)
TP_ARGS(sb, inode, group, start, len)
);
TRACE_EVENT(ext4_forget,
@ -832,7 +901,8 @@ TRACE_EVENT(ext4_forget,
TP_ARGS(inode, is_metadata, block),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( umode_t, mode )
__field( int, is_metadata )
@ -840,16 +910,18 @@ TRACE_EVENT(ext4_forget,
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->is_metadata = is_metadata;
__entry->block = block;
),
TP_printk("dev %s ino %lu mode 0%o is_metadata %d block %llu",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->mode, __entry->is_metadata, __entry->block)
TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %llu",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino, __entry->mode,
__entry->is_metadata, __entry->block)
);
TRACE_EVENT(ext4_da_update_reserve_space,
@ -858,7 +930,8 @@ TRACE_EVENT(ext4_da_update_reserve_space,
TP_ARGS(inode, used_blocks),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, i_blocks )
@ -869,7 +942,8 @@ TRACE_EVENT(ext4_da_update_reserve_space,
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
@ -879,9 +953,10 @@ TRACE_EVENT(ext4_da_update_reserve_space,
__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
),
TP_printk("dev %s ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->mode, (unsigned long long) __entry->i_blocks,
TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino, __entry->mode,
(unsigned long long) __entry->i_blocks,
__entry->used_blocks, __entry->reserved_data_blocks,
__entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
);
@ -892,7 +967,8 @@ TRACE_EVENT(ext4_da_reserve_space,
TP_ARGS(inode, md_needed),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, i_blocks )
@ -902,7 +978,8 @@ TRACE_EVENT(ext4_da_reserve_space,
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
@ -911,8 +988,9 @@ TRACE_EVENT(ext4_da_reserve_space,
__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
),
TP_printk("dev %s ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino,
__entry->mode, (unsigned long long) __entry->i_blocks,
__entry->md_needed, __entry->reserved_data_blocks,
__entry->reserved_meta_blocks)
@ -924,7 +1002,8 @@ TRACE_EVENT(ext4_da_release_space,
TP_ARGS(inode, freed_blocks),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, i_blocks )
@ -935,7 +1014,8 @@ TRACE_EVENT(ext4_da_release_space,
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
@ -945,8 +1025,9 @@ TRACE_EVENT(ext4_da_release_space,
__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
),
TP_printk("dev %s ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino,
__entry->mode, (unsigned long long) __entry->i_blocks,
__entry->freed_blocks, __entry->reserved_data_blocks,
__entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
@ -958,18 +1039,20 @@ DECLARE_EVENT_CLASS(ext4__bitmap_load,
TP_ARGS(sb, group),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( __u32, group )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->dev_major = MAJOR(sb->s_dev);
__entry->dev_minor = MINOR(sb->s_dev);
__entry->group = group;
),
TP_printk("dev %s group %u",
jbd2_dev_to_name(__entry->dev), __entry->group)
TP_printk("dev %d,%d group %u",
__entry->dev_major, __entry->dev_minor, __entry->group)
);
DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load,

View file

@ -17,17 +17,19 @@ TRACE_EVENT(jbd2_checkpoint,
TP_ARGS(journal, result),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( int, result )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev);
__entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev);
__entry->result = result;
),
TP_printk("dev %s result %d",
jbd2_dev_to_name(__entry->dev), __entry->result)
TP_printk("dev %d,%d result %d",
__entry->dev_major, __entry->dev_minor, __entry->result)
);
DECLARE_EVENT_CLASS(jbd2_commit,
@ -37,20 +39,22 @@ DECLARE_EVENT_CLASS(jbd2_commit,
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( char, sync_commit )
__field( int, transaction )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev);
__entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev);
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
),
TP_printk("dev %s transaction %d sync %d",
jbd2_dev_to_name(__entry->dev), __entry->transaction,
__entry->sync_commit)
TP_printk("dev %d,%d transaction %d sync %d",
__entry->dev_major, __entry->dev_minor,
__entry->transaction, __entry->sync_commit)
);
DEFINE_EVENT(jbd2_commit, jbd2_start_commit,
@ -87,22 +91,24 @@ TRACE_EVENT(jbd2_end_commit,
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( char, sync_commit )
__field( int, transaction )
__field( int, head )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev);
__entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev);
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
__entry->head = journal->j_tail_sequence;
),
TP_printk("dev %s transaction %d sync %d head %d",
jbd2_dev_to_name(__entry->dev), __entry->transaction,
__entry->sync_commit, __entry->head)
TP_printk("dev %d,%d transaction %d sync %d head %d",
__entry->dev_major, __entry->dev_minor,
__entry->transaction, __entry->sync_commit, __entry->head)
);
TRACE_EVENT(jbd2_submit_inode_data,
@ -111,17 +117,20 @@ TRACE_EVENT(jbd2_submit_inode_data,
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( ino_t, ino )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev_major = MAJOR(inode->i_sb->s_dev);
__entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
),
TP_printk("dev %s ino %lu",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
TP_printk("dev %d,%d ino %lu",
__entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino)
);
TRACE_EVENT(jbd2_run_stats,
@ -131,7 +140,8 @@ TRACE_EVENT(jbd2_run_stats,
TP_ARGS(dev, tid, stats),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( unsigned long, tid )
__field( unsigned long, wait )
__field( unsigned long, running )
@ -144,7 +154,8 @@ TRACE_EVENT(jbd2_run_stats,
),
TP_fast_assign(
__entry->dev = dev;
__entry->dev_major = MAJOR(dev);
__entry->dev_minor = MINOR(dev);
__entry->tid = tid;
__entry->wait = stats->rs_wait;
__entry->running = stats->rs_running;
@ -156,9 +167,9 @@ TRACE_EVENT(jbd2_run_stats,
__entry->blocks_logged = stats->rs_blocks_logged;
),
TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u "
TP_printk("dev %d,%d tid %lu wait %u running %u locked %u flushing %u "
"logging %u handle_count %u blocks %u blocks_logged %u",
jbd2_dev_to_name(__entry->dev), __entry->tid,
__entry->dev_major, __entry->dev_minor, __entry->tid,
jiffies_to_msecs(__entry->wait),
jiffies_to_msecs(__entry->running),
jiffies_to_msecs(__entry->locked),
@ -175,7 +186,8 @@ TRACE_EVENT(jbd2_checkpoint_stats,
TP_ARGS(dev, tid, stats),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( unsigned long, tid )
__field( unsigned long, chp_time )
__field( __u32, forced_to_close )
@ -184,7 +196,8 @@ TRACE_EVENT(jbd2_checkpoint_stats,
),
TP_fast_assign(
__entry->dev = dev;
__entry->dev_major = MAJOR(dev);
__entry->dev_minor = MINOR(dev);
__entry->tid = tid;
__entry->chp_time = stats->cs_chp_time;
__entry->forced_to_close= stats->cs_forced_to_close;
@ -192,9 +205,9 @@ TRACE_EVENT(jbd2_checkpoint_stats,
__entry->dropped = stats->cs_dropped;
),
TP_printk("dev %s tid %lu chp_time %u forced_to_close %u "
TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u "
"written %u dropped %u",
jbd2_dev_to_name(__entry->dev), __entry->tid,
__entry->dev_major, __entry->dev_minor, __entry->tid,
jiffies_to_msecs(__entry->chp_time),
__entry->forced_to_close, __entry->written, __entry->dropped)
);
@ -207,7 +220,8 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
TP_ARGS(journal, first_tid, block_nr, freed),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, dev_major )
__field( int, dev_minor )
__field( tid_t, tail_sequence )
__field( tid_t, first_tid )
__field(unsigned long, block_nr )
@ -215,16 +229,18 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev);
__entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev);
__entry->tail_sequence = journal->j_tail_sequence;
__entry->first_tid = first_tid;
__entry->block_nr = block_nr;
__entry->freed = freed;
),
TP_printk("dev %s from %u to %u offset %lu freed %lu",
jbd2_dev_to_name(__entry->dev), __entry->tail_sequence,
__entry->first_tid, __entry->block_nr, __entry->freed)
TP_printk("dev %d,%d from %u to %u offset %lu freed %lu",
__entry->dev_major, __entry->dev_minor,
__entry->tail_sequence, __entry->first_tid,
__entry->block_nr, __entry->freed)
);
#endif /* _TRACE_JBD2_H */