diff --git a/fs/inode.c b/fs/inode.c index ef362364d396..b153aeaa61ea 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -346,9 +346,8 @@ void inc_nlink(struct inode *inode) } EXPORT_SYMBOL(inc_nlink); -void address_space_init_once(struct address_space *mapping) +static void __address_space_init_once(struct address_space *mapping) { - memset(mapping, 0, sizeof(*mapping)); INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT); spin_lock_init(&mapping->tree_lock); init_rwsem(&mapping->i_mmap_rwsem); @@ -356,6 +355,12 @@ void address_space_init_once(struct address_space *mapping) spin_lock_init(&mapping->private_lock); mapping->i_mmap = RB_ROOT_CACHED; } + +void address_space_init_once(struct address_space *mapping) +{ + memset(mapping, 0, sizeof(*mapping)); + __address_space_init_once(mapping); +} EXPORT_SYMBOL(address_space_init_once); /* @@ -371,7 +376,7 @@ void inode_init_once(struct inode *inode) INIT_LIST_HEAD(&inode->i_io_list); INIT_LIST_HEAD(&inode->i_wb_list); INIT_LIST_HEAD(&inode->i_lru); - address_space_init_once(&inode->i_data); + __address_space_init_once(&inode->i_data); i_size_ordered_init(inode); } EXPORT_SYMBOL(inode_init_once); @@ -1533,7 +1538,6 @@ retry: if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) { if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) { atomic_inc(&inode->i_count); - inode->i_state &= ~I_DIRTY_TIME; spin_unlock(&inode->i_lock); trace_writeback_lazytime_iput(inode); mark_inode_dirty_sync(inode); diff --git a/fs/sync.c b/fs/sync.c index 9908a114d506..b54e0541ad89 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -192,12 +192,8 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) if (!file->f_op->fsync) return -EINVAL; - if (!datasync && (inode->i_state & I_DIRTY_TIME)) { - spin_lock(&inode->i_lock); - inode->i_state &= ~I_DIRTY_TIME; - spin_unlock(&inode->i_lock); + if (!datasync && (inode->i_state & I_DIRTY_TIME)) mark_inode_dirty_sync(inode); - } return file->f_op->fsync(file, start, end, datasync); } EXPORT_SYMBOL(vfs_fsync_range); diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 393b6849aeb3..7bace03dc9dc 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -46,13 +46,13 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) } void * -kmem_zalloc_large(size_t size, xfs_km_flags_t flags) +kmem_alloc_large(size_t size, xfs_km_flags_t flags) { unsigned nofs_flag = 0; void *ptr; gfp_t lflags; - ptr = kmem_zalloc(size, flags | KM_MAYFAIL); + ptr = kmem_alloc(size, flags | KM_MAYFAIL); if (ptr) return ptr; @@ -67,7 +67,7 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags) nofs_flag = memalloc_nofs_save(); lflags = kmem_flags_convert(flags); - ptr = __vmalloc(size, lflags | __GFP_ZERO, PAGE_KERNEL); + ptr = __vmalloc(size, lflags, PAGE_KERNEL); if (flags & KM_NOFS) memalloc_nofs_restore(nofs_flag); diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 4b87472f35bc..6023b594ead7 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -71,7 +71,7 @@ kmem_flags_convert(xfs_km_flags_t flags) } extern void *kmem_alloc(size_t, xfs_km_flags_t); -extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t); +extern void *kmem_alloc_large(size_t size, xfs_km_flags_t); extern void *kmem_realloc(const void *, size_t, xfs_km_flags_t); static inline void kmem_free(const void *ptr) { @@ -85,6 +85,12 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags) return kmem_alloc(size, flags | KM_ZERO); } +static inline void * +kmem_zalloc_large(size_t size, xfs_km_flags_t flags) +{ + return kmem_alloc_large(size, flags | KM_ZERO); +} + /* * Zone interfaces */ diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 2291f4224e24..03885a968de8 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -95,13 +95,13 @@ xfs_ag_resv_critical( switch (type) { case XFS_AG_RESV_METADATA: - avail = pag->pagf_freeblks - pag->pag_agfl_resv.ar_reserved; + avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved; orig = pag->pag_meta_resv.ar_asked; break; - case XFS_AG_RESV_AGFL: + case XFS_AG_RESV_RMAPBT: avail = pag->pagf_freeblks + pag->pagf_flcount - pag->pag_meta_resv.ar_reserved; - orig = pag->pag_agfl_resv.ar_asked; + orig = pag->pag_rmapbt_resv.ar_asked; break; default: ASSERT(0); @@ -126,10 +126,10 @@ xfs_ag_resv_needed( { xfs_extlen_t len; - len = pag->pag_meta_resv.ar_reserved + pag->pag_agfl_resv.ar_reserved; + len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved; switch (type) { case XFS_AG_RESV_METADATA: - case XFS_AG_RESV_AGFL: + case XFS_AG_RESV_RMAPBT: len -= xfs_perag_resv(pag, type)->ar_reserved; break; case XFS_AG_RESV_NONE: @@ -160,10 +160,11 @@ __xfs_ag_resv_free( if (pag->pag_agno == 0) pag->pag_mount->m_ag_max_usable += resv->ar_asked; /* - * AGFL blocks are always considered "free", so whatever - * was reserved at mount time must be given back at umount. + * RMAPBT blocks come from the AGFL and AGFL blocks are always + * considered "free", so whatever was reserved at mount time must be + * given back at umount. */ - if (type == XFS_AG_RESV_AGFL) + if (type == XFS_AG_RESV_RMAPBT) oldresv = resv->ar_orig_reserved; else oldresv = resv->ar_reserved; @@ -185,7 +186,7 @@ xfs_ag_resv_free( int error; int err2; - error = __xfs_ag_resv_free(pag, XFS_AG_RESV_AGFL); + error = __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT); err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA); if (err2 && !error) error = err2; @@ -284,15 +285,15 @@ xfs_ag_resv_init( } } - /* Create the AGFL metadata reservation */ - if (pag->pag_agfl_resv.ar_asked == 0) { + /* Create the RMAPBT metadata reservation */ + if (pag->pag_rmapbt_resv.ar_asked == 0) { ask = used = 0; error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used); if (error) goto out; - error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used); + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_RMAPBT, ask, used); if (error) goto out; } @@ -304,7 +305,7 @@ xfs_ag_resv_init( return error; ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + - xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= + xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <= pag->pagf_freeblks + pag->pagf_flcount); #endif out: @@ -325,8 +326,10 @@ xfs_ag_resv_alloc_extent( trace_xfs_ag_resv_alloc_extent(pag, type, args->len); switch (type) { - case XFS_AG_RESV_METADATA: case XFS_AG_RESV_AGFL: + return; + case XFS_AG_RESV_METADATA: + case XFS_AG_RESV_RMAPBT: resv = xfs_perag_resv(pag, type); break; default: @@ -341,7 +344,7 @@ xfs_ag_resv_alloc_extent( len = min_t(xfs_extlen_t, args->len, resv->ar_reserved); resv->ar_reserved -= len; - if (type == XFS_AG_RESV_AGFL) + if (type == XFS_AG_RESV_RMAPBT) return; /* Allocations of reserved blocks only need on-disk sb updates... */ xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len); @@ -365,8 +368,10 @@ xfs_ag_resv_free_extent( trace_xfs_ag_resv_free_extent(pag, type, len); switch (type) { - case XFS_AG_RESV_METADATA: case XFS_AG_RESV_AGFL: + return; + case XFS_AG_RESV_METADATA: + case XFS_AG_RESV_RMAPBT: resv = xfs_perag_resv(pag, type); break; default: @@ -379,7 +384,7 @@ xfs_ag_resv_free_extent( leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved); resv->ar_reserved += leftover; - if (type == XFS_AG_RESV_AGFL) + if (type == XFS_AG_RESV_RMAPBT) return; /* Freeing into the reserved pool only requires on-disk update... */ xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len); diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h index 8d6c687deef3..938f2f96c5e8 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.h +++ b/fs/xfs/libxfs/xfs_ag_resv.h @@ -32,4 +32,35 @@ void xfs_ag_resv_alloc_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type, void xfs_ag_resv_free_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type, struct xfs_trans *tp, xfs_extlen_t len); +/* + * RMAPBT reservation accounting wrappers. Since rmapbt blocks are sourced from + * the AGFL, they are allocated one at a time and the reservation updates don't + * require a transaction. + */ +static inline void +xfs_ag_resv_rmapbt_alloc( + struct xfs_mount *mp, + xfs_agnumber_t agno) +{ + struct xfs_alloc_arg args = {0}; + struct xfs_perag *pag; + + args.len = 1; + pag = xfs_perag_get(mp, agno); + xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_RMAPBT, &args); + xfs_perag_put(pag); +} + +static inline void +xfs_ag_resv_rmapbt_free( + struct xfs_mount *mp, + xfs_agnumber_t agno) +{ + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, agno); + xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1); + xfs_perag_put(pag); +} + #endif /* __XFS_AG_RESV_H__ */ diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index c02781a4c091..39387bdd225d 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -53,6 +53,23 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); +/* + * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in + * the beginning of the block for a proper header with the location information + * and CRC. + */ +unsigned int +xfs_agfl_size( + struct xfs_mount *mp) +{ + unsigned int size = mp->m_sb.sb_sectsize; + + if (xfs_sb_version_hascrc(&mp->m_sb)) + size -= sizeof(struct xfs_agfl); + + return size / sizeof(xfs_agblock_t); +} + unsigned int xfs_refc_block( struct xfs_mount *mp) @@ -550,7 +567,7 @@ xfs_agfl_verify( if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) return __this_address; - for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { + for (i = 0; i < xfs_agfl_size(mp); i++) { if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) return __this_address; @@ -1564,7 +1581,6 @@ xfs_alloc_ag_vextent_small( int *stat) /* status: 0-freelist, 1-normal/none */ { struct xfs_owner_info oinfo; - struct xfs_perag *pag; int error; xfs_agblock_t fbno; xfs_extlen_t flen; @@ -1616,18 +1632,13 @@ xfs_alloc_ag_vextent_small( /* * If we're feeding an AGFL block to something that * doesn't live in the free space, we need to clear - * out the OWN_AG rmap and add the block back to - * the AGFL per-AG reservation. + * out the OWN_AG rmap. */ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1, &oinfo); if (error) goto error0; - pag = xfs_perag_get(args->mp, args->agno); - xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL, - args->tp, 1); - xfs_perag_put(pag); *stat = 0; return 0; @@ -1911,14 +1922,12 @@ xfs_free_ag_extent( XFS_STATS_INC(mp, xs_freex); XFS_STATS_ADD(mp, xs_freeb, len); - trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL, - haveleft, haveright); + trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright); return 0; error0: - trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL, - -1, -1); + trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1); if (bno_cur) xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); if (cnt_cur) @@ -2053,6 +2062,93 @@ xfs_alloc_space_available( return true; } +/* + * Check the agfl fields of the agf for inconsistency or corruption. The purpose + * is to detect an agfl header padding mismatch between current and early v5 + * kernels. This problem manifests as a 1-slot size difference between the + * on-disk flcount and the active [first, last] range of a wrapped agfl. This + * may also catch variants of agfl count corruption unrelated to padding. Either + * way, we'll reset the agfl and warn the user. + * + * Return true if a reset is required before the agfl can be used, false + * otherwise. + */ +static bool +xfs_agfl_needs_reset( + struct xfs_mount *mp, + struct xfs_agf *agf) +{ + uint32_t f = be32_to_cpu(agf->agf_flfirst); + uint32_t l = be32_to_cpu(agf->agf_fllast); + uint32_t c = be32_to_cpu(agf->agf_flcount); + int agfl_size = xfs_agfl_size(mp); + int active; + + /* no agfl header on v4 supers */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return false; + + /* + * The agf read verifier catches severe corruption of these fields. + * Repeat some sanity checks to cover a packed -> unpacked mismatch if + * the verifier allows it. + */ + if (f >= agfl_size || l >= agfl_size) + return true; + if (c > agfl_size) + return true; + + /* + * Check consistency between the on-disk count and the active range. An + * agfl padding mismatch manifests as an inconsistent flcount. + */ + if (c && l >= f) + active = l - f + 1; + else if (c) + active = agfl_size - f + l + 1; + else + active = 0; + + return active != c; +} + +/* + * Reset the agfl to an empty state. Ignore/drop any existing blocks since the + * agfl content cannot be trusted. Warn the user that a repair is required to + * recover leaked blocks. + * + * The purpose of this mechanism is to handle filesystems affected by the agfl + * header padding mismatch problem. A reset keeps the filesystem online with a + * relatively minor free space accounting inconsistency rather than suffer the + * inevitable crash from use of an invalid agfl block. + */ +static void +xfs_agfl_reset( + struct xfs_trans *tp, + struct xfs_buf *agbp, + struct xfs_perag *pag) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + + ASSERT(pag->pagf_agflreset); + trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_); + + xfs_warn(mp, + "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. " + "Please unmount and run xfs_repair.", + pag->pag_agno, pag->pagf_flcount); + + agf->agf_flfirst = 0; + agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1); + agf->agf_flcount = 0; + xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST | + XFS_AGF_FLCOUNT); + + pag->pagf_flcount = 0; + pag->pagf_agflreset = false; +} + /* * Decide whether to use this allocation group for this allocation. * If so, fix up the btree freelist's size. @@ -2114,6 +2210,10 @@ xfs_alloc_fix_freelist( } } + /* reset a padding mismatched agfl before final free space check */ + if (pag->pagf_agflreset) + xfs_agfl_reset(tp, agbp, pag); + /* If there isn't enough total space or single-extent, reject it. */ need = xfs_alloc_min_freelist(mp, pag); if (!xfs_alloc_space_available(args, need, flags)) @@ -2266,10 +2366,11 @@ xfs_alloc_get_freelist( bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); be32_add_cpu(&agf->agf_flfirst, 1); xfs_trans_brelse(tp, agflbp); - if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) + if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp)) agf->agf_flfirst = 0; pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); + ASSERT(!pag->pagf_agflreset); be32_add_cpu(&agf->agf_flcount, -1); xfs_trans_agflist_delta(tp, -1); pag->pagf_flcount--; @@ -2377,10 +2478,11 @@ xfs_alloc_put_freelist( be32_to_cpu(agf->agf_seqno), &agflbp))) return error; be32_add_cpu(&agf->agf_fllast, 1); - if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) + if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp)) agf->agf_fllast = 0; pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); + ASSERT(!pag->pagf_agflreset); be32_add_cpu(&agf->agf_flcount, 1); xfs_trans_agflist_delta(tp, 1); pag->pagf_flcount++; @@ -2395,7 +2497,7 @@ xfs_alloc_put_freelist( xfs_alloc_log_agf(tp, agbp, logflags); - ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); + ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)); agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; @@ -2428,9 +2530,9 @@ xfs_agf_verify( if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && - be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && - be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && - be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) + be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) && + be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) && + be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp))) return __this_address; if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || @@ -2588,6 +2690,7 @@ xfs_alloc_read_agf( pag->pagb_count = 0; pag->pagb_tree = RB_ROOT; pag->pagf_init = 1; + pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf); } #ifdef DEBUG else if (!XFS_FORCED_SHUTDOWN(mp)) { diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 65a0cafe06e4..a311a2414a6b 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -26,6 +26,8 @@ struct xfs_trans; extern struct workqueue_struct *xfs_alloc_wq; +unsigned int xfs_agfl_size(struct xfs_mount *mp); + /* * Freespace allocation types. Argument to xfs_alloc_[v]extent. */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 6840b588187e..b451649ba176 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -74,18 +74,13 @@ xfs_allocbt_alloc_block( int error; xfs_agblock_t bno; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - /* Allocate the new block from the freelist. If we can't, give up. */ error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, &bno, 1); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } if (bno == NULLAGBLOCK) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -95,7 +90,6 @@ xfs_allocbt_alloc_block( xfs_trans_agbtree_delta(cur->bc_tp, 1); new->s = cpu_to_be32(bno); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index daae00ed30c5..3b03d886df66 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1244,8 +1244,9 @@ xfs_iread_extents( xfs_warn(ip->i_mount, "corrupt dinode %Lu, (btree extents).", (unsigned long long) ip->i_ino); - XFS_CORRUPTION_ERROR(__func__, - XFS_ERRLEVEL_LOW, ip->i_mount, block); + xfs_inode_verifier_error(ip, -EFSCORRUPTED, + __func__, block, sizeof(*block), + __this_address); error = -EFSCORRUPTED; goto out_brelse; } @@ -1261,11 +1262,15 @@ xfs_iread_extents( */ frp = XFS_BMBT_REC_ADDR(mp, block, 1); for (j = 0; j < num_recs; j++, frp++, i++) { + xfs_failaddr_t fa; + xfs_bmbt_disk_get_all(frp, &new); - if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) { - XFS_ERROR_REPORT("xfs_bmap_read_extents(2)", - XFS_ERRLEVEL_LOW, mp); + fa = xfs_bmap_validate_extent(ip, whichfork, &new); + if (fa) { error = -EFSCORRUPTED; + xfs_inode_verifier_error(ip, error, + "xfs_iread_extents(2)", + frp, sizeof(*frp), fa); goto out_brelse; } xfs_iext_insert(ip, &icur, &new, state); @@ -6154,3 +6159,39 @@ xfs_bmap_finish_one( return error; } + +/* Check that an inode's extent does not have invalid flags or bad ranges. */ +xfs_failaddr_t +xfs_bmap_validate_extent( + struct xfs_inode *ip, + int whichfork, + struct xfs_bmbt_irec *irec) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_fsblock_t endfsb; + bool isrt; + + isrt = XFS_IS_REALTIME_INODE(ip); + endfsb = irec->br_startblock + irec->br_blockcount - 1; + if (isrt) { + if (!xfs_verify_rtbno(mp, irec->br_startblock)) + return __this_address; + if (!xfs_verify_rtbno(mp, endfsb)) + return __this_address; + } else { + if (!xfs_verify_fsbno(mp, irec->br_startblock)) + return __this_address; + if (!xfs_verify_fsbno(mp, endfsb)) + return __this_address; + if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) != + XFS_FSB_TO_AGNO(mp, endfsb)) + return __this_address; + } + if (irec->br_state != XFS_EXT_NORM) { + if (whichfork != XFS_DATA_FORK) + return __this_address; + if (!xfs_sb_version_hasextflgbit(&mp->m_sb)) + return __this_address; + } + return NULL; +} diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index e36d75799cd5..f3be6416260b 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -274,4 +274,7 @@ static inline int xfs_bmap_fork_to_state(int whichfork) } } +xfs_failaddr_t xfs_bmap_validate_extent(struct xfs_inode *ip, int whichfork, + struct xfs_bmbt_irec *irec); + #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 9faf479aba49..d89d06bea6e3 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -272,10 +272,10 @@ xfs_bmbt_alloc_block( cur->bc_private.b.dfops->dop_low = true; } if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } + ASSERT(args.len == 1); cur->bc_private.b.firstblock = args.fsbno; cur->bc_private.b.allocated++; @@ -286,12 +286,10 @@ xfs_bmbt_alloc_block( new->l = cpu_to_be64(args.fsbno); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index 135b8c56d23e..e4505746ccaa 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h @@ -118,18 +118,4 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); -/* - * Check that the extent does not contain an invalid unwritten extent flag. - */ -static inline bool xfs_bmbt_validate_extent(struct xfs_mount *mp, int whichfork, - struct xfs_bmbt_irec *irec) -{ - if (irec->br_state == XFS_EXT_NORM) - return true; - if (whichfork == XFS_DATA_FORK && - xfs_sb_version_hasextflgbit(&mp->m_sb)) - return true; - return false; -} - #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 79ee4a1951d1..edc0193358a5 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -1438,8 +1438,6 @@ xfs_btree_log_keys( int first, int last) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); if (bp) { xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); @@ -1450,8 +1448,6 @@ xfs_btree_log_keys( xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, xfs_ilog_fbroot(cur->bc_private.b.whichfork)); } - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); } /* @@ -1464,15 +1460,12 @@ xfs_btree_log_recs( int first, int last) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); xfs_trans_log_buf(cur->bc_tp, bp, xfs_btree_rec_offset(cur, first), xfs_btree_rec_offset(cur, last + 1) - 1); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); } /* @@ -1485,8 +1478,6 @@ xfs_btree_log_ptrs( int first, /* index of first pointer to log */ int last) /* index of last pointer to log */ { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); if (bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); @@ -1501,7 +1492,6 @@ xfs_btree_log_ptrs( xfs_ilog_fbroot(cur->bc_private.b.whichfork)); } - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); } /* @@ -1543,9 +1533,6 @@ xfs_btree_log_block( XFS_BTREE_LBLOCK_CRC_LEN }; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGBI(cur, bp, fields); - if (bp) { int nbits; @@ -1573,8 +1560,6 @@ xfs_btree_log_block( xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, xfs_ilog_fbroot(cur->bc_private.b.whichfork)); } - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); } /* @@ -1593,9 +1578,6 @@ xfs_btree_increment( int error; /* error return value */ int lev; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, level); - ASSERT(level < cur->bc_nlevels); /* Read-ahead to the right at this level. */ @@ -1671,17 +1653,14 @@ xfs_btree_increment( cur->bc_ptrs[lev] = 1; } out1: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -1701,9 +1680,6 @@ xfs_btree_decrement( int lev; union xfs_btree_ptr ptr; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, level); - ASSERT(level < cur->bc_nlevels); /* Read-ahead to the left at this level. */ @@ -1769,17 +1745,14 @@ xfs_btree_decrement( cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); } out1: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -1881,9 +1854,6 @@ xfs_btree_lookup( union xfs_btree_ptr *pp; /* ptr to btree block */ union xfs_btree_ptr ptr; /* ptr to btree block */ - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, dir); - XFS_BTREE_STATS_INC(cur, lookup); /* No such thing as a zero-level tree. */ @@ -1929,7 +1899,6 @@ xfs_btree_lookup( ASSERT(level == 0 && cur->bc_nlevels == 1); cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -2004,7 +1973,6 @@ xfs_btree_lookup( if (error) goto error0; XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; } @@ -2019,11 +1987,9 @@ xfs_btree_lookup( *stat = 1; else *stat = 0; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -2169,10 +2135,8 @@ __xfs_btree_updkeys( trace_xfs_btree_updkeys(cur, level, bp); #ifdef DEBUG error = xfs_btree_check_block(cur, block, level, bp); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } #endif ptr = cur->bc_ptrs[level]; nlkey = xfs_btree_key_addr(cur, ptr, block); @@ -2224,9 +2188,6 @@ xfs_btree_update_keys( if (cur->bc_flags & XFS_BTREE_OVERLAPPING) return __xfs_btree_updkeys(cur, level, block, bp, false); - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGIK(cur, level, keyp); - /* * Go up the tree from this level toward the root. * At each level, update the key value to the value input. @@ -2241,10 +2202,8 @@ xfs_btree_update_keys( block = xfs_btree_get_block(cur, level, &bp); #ifdef DEBUG error = xfs_btree_check_block(cur, block, level, bp); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } #endif ptr = cur->bc_ptrs[level]; kp = xfs_btree_key_addr(cur, ptr, block); @@ -2252,7 +2211,6 @@ xfs_btree_update_keys( xfs_btree_log_keys(cur, bp, ptr, ptr); } - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; } @@ -2272,9 +2230,6 @@ xfs_btree_update( int ptr; union xfs_btree_rec *rp; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGR(cur, rec); - /* Pick up the current block. */ block = xfs_btree_get_block(cur, 0, &bp); @@ -2307,11 +2262,9 @@ xfs_btree_update( goto error0; } - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -2339,9 +2292,6 @@ xfs_btree_lshift( int error; /* error return value */ int i; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, level); - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && level == cur->bc_nlevels - 1) goto out0; @@ -2500,21 +2450,17 @@ xfs_btree_lshift( /* Slide the cursor value left one. */ cur->bc_ptrs[level]--; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; error1: - XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR); xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); return error; } @@ -2541,9 +2487,6 @@ xfs_btree_rshift( int error; /* error return value */ int i; /* loop counter */ - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, level); - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && (level == cur->bc_nlevels - 1)) goto out0; @@ -2676,21 +2619,17 @@ xfs_btree_rshift( xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; error1: - XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR); xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); return error; } @@ -2726,9 +2665,6 @@ __xfs_btree_split( int i; #endif - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key); - XFS_BTREE_STATS_INC(cur, split); /* Set up left block (current one). */ @@ -2878,16 +2814,13 @@ __xfs_btree_split( (*curp)->bc_ptrs[level + 1]++; } *ptrp = rptr; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -2994,7 +2927,6 @@ xfs_btree_new_iroot( int i; /* loop counter */ #endif - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); XFS_BTREE_STATS_INC(cur, newroot); ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); @@ -3008,10 +2940,9 @@ xfs_btree_new_iroot( error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); if (error) goto error0; - if (*stat == 0) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + if (*stat == 0) return 0; - } + XFS_BTREE_STATS_INC(cur, alloc); /* Copy the root into a real block. */ @@ -3074,10 +3005,8 @@ xfs_btree_new_iroot( *logflags |= XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork); *stat = 1; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -3102,7 +3031,6 @@ xfs_btree_new_root( union xfs_btree_ptr rptr; union xfs_btree_ptr lptr; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); XFS_BTREE_STATS_INC(cur, newroot); /* initialise our start point from the cursor */ @@ -3202,14 +3130,11 @@ xfs_btree_new_root( xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); cur->bc_ptrs[cur->bc_nlevels] = nptr; cur->bc_nlevels++; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -3230,7 +3155,7 @@ xfs_btree_make_block_unfull( if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && level == cur->bc_nlevels - 1) { - struct xfs_inode *ip = cur->bc_private.b.ip; + struct xfs_inode *ip = cur->bc_private.b.ip; if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { /* A root block that can be made bigger. */ @@ -3309,9 +3234,6 @@ xfs_btree_insrec( #endif xfs_daddr_t old_bn; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec); - ncur = NULL; lkey = &nkey; @@ -3324,14 +3246,12 @@ xfs_btree_insrec( error = xfs_btree_new_root(cur, stat); xfs_btree_set_ptr_null(cur, ptrp); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return error; } /* If we're off the left edge, return failure. */ ptr = cur->bc_ptrs[level]; if (ptr == 0) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -3489,12 +3409,10 @@ xfs_btree_insrec( *curp = ncur; } - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -3572,11 +3490,9 @@ xfs_btree_insert( } } while (!xfs_btree_ptr_is_null(cur, &nptr)); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = i; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -3611,8 +3527,6 @@ xfs_btree_kill_iroot( int i; #endif - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); ASSERT(cur->bc_nlevels > 1); @@ -3670,19 +3584,15 @@ xfs_btree_kill_iroot( #ifdef DEBUG for (i = 0; i < numrecs; i++) { error = xfs_btree_check_ptr(cur, cpp, i, level - 1); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } } #endif xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); error = xfs_btree_free_block(cur, cbp); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } cur->bc_bufs[level - 1] = NULL; be16_add_cpu(&block->bb_level, -1); @@ -3690,7 +3600,6 @@ xfs_btree_kill_iroot( XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); cur->bc_nlevels--; out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; } @@ -3706,7 +3615,6 @@ xfs_btree_kill_root( { int error; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); XFS_BTREE_STATS_INC(cur, killroot); /* @@ -3716,16 +3624,13 @@ xfs_btree_kill_root( cur->bc_ops->set_root(cur, newroot, -1); error = xfs_btree_free_block(cur, bp); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } cur->bc_bufs[level] = NULL; cur->bc_ra[level] = 0; cur->bc_nlevels--; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; } @@ -3744,7 +3649,6 @@ xfs_btree_dec_cursor( return error; } - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; } @@ -3780,15 +3684,11 @@ xfs_btree_delrec( struct xfs_btree_cur *tcur; /* temporary btree cursor */ int numrecs; /* temporary numrec count */ - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, level); - tcur = NULL; /* Get the index of the entry being deleted, check for nothing there. */ ptr = cur->bc_ptrs[level]; if (ptr == 0) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -3805,7 +3705,6 @@ xfs_btree_delrec( /* Fail if we're off the end of the block. */ if (ptr > numrecs) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -4080,7 +3979,7 @@ xfs_btree_delrec( tcur = NULL; if (level == 0) cur->bc_ptrs[0]++; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; return 0; } @@ -4250,13 +4149,11 @@ xfs_btree_delrec( * call updkeys directly. */ - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); /* Return value means the next level up has something to do. */ *stat = 2; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); if (tcur) xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); return error; @@ -4277,8 +4174,6 @@ xfs_btree_delete( int i; bool joined = false; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - /* * Go up the tree, starting at leaf level. * @@ -4314,11 +4209,9 @@ xfs_btree_delete( } } - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = i; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 50440b5618e8..58e30c0975c3 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -473,25 +473,6 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) #define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b)) #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) -/* - * Trace hooks. Currently not implemented as they need to be ported - * over to the generic tracing functionality, which is some effort. - * - * i,j = integer (32 bit) - * b = btree block buffer (xfs_buf_t) - * p = btree ptr - * r = btree record - * k = btree key - */ -#define XFS_BTREE_TRACE_ARGBI(c, b, i) -#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) -#define XFS_BTREE_TRACE_ARGI(c, i) -#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) -#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) -#define XFS_BTREE_TRACE_ARGIK(c, i, k) -#define XFS_BTREE_TRACE_ARGR(c, r) -#define XFS_BTREE_TRACE_CURSOR(c, t) - xfs_failaddr_t xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); xfs_failaddr_t xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index 388d67c5c903..989e95a53db2 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -173,7 +173,7 @@ extern void xfs_dir2_data_log_unused(struct xfs_da_args *args, extern void xfs_dir2_data_make_free(struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); -extern void xfs_dir2_data_use_free(struct xfs_da_args *args, +extern int xfs_dir2_data_use_free(struct xfs_da_args *args, struct xfs_buf *bp, struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 2da86a394bcf..875893ded514 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -451,15 +451,19 @@ xfs_dir2_block_addname( * No stale entries, will use enddup space to hold new leaf. */ if (!btp->stale) { + xfs_dir2_data_aoff_t aoff; + /* * Mark the space needed for the new leaf entry, now in use. */ - xfs_dir2_data_use_free(args, bp, enddup, - (xfs_dir2_data_aoff_t) - ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) - - sizeof(*blp)), - (xfs_dir2_data_aoff_t)sizeof(*blp), - &needlog, &needscan); + aoff = (xfs_dir2_data_aoff_t)((char *)enddup - (char *)hdr + + be16_to_cpu(enddup->length) - sizeof(*blp)); + error = xfs_dir2_data_use_free(args, bp, enddup, aoff, + (xfs_dir2_data_aoff_t)sizeof(*blp), &needlog, + &needscan); + if (error) + return error; + /* * Update the tail (entry count). */ @@ -541,9 +545,11 @@ xfs_dir2_block_addname( /* * Mark space for the data entry used. */ - xfs_dir2_data_use_free(args, bp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), - (xfs_dir2_data_aoff_t)len, &needlog, &needscan); + error = xfs_dir2_data_use_free(args, bp, dup, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), + (xfs_dir2_data_aoff_t)len, &needlog, &needscan); + if (error) + return error; /* * Create the new data entry. */ @@ -997,8 +1003,10 @@ xfs_dir2_leaf_to_block( /* * Use up the space at the end of the block (blp/btp). */ - xfs_dir2_data_use_free(args, dbp, dup, args->geo->blksize - size, size, - &needlog, &needscan); + error = xfs_dir2_data_use_free(args, dbp, dup, + args->geo->blksize - size, size, &needlog, &needscan); + if (error) + return error; /* * Initialize the block tail. */ @@ -1110,18 +1118,14 @@ xfs_dir2_sf_to_block( * Add block 0 to the inode. */ error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); - if (error) { - kmem_free(sfp); - return error; - } + if (error) + goto out_free; /* * Initialize the data block, then convert it to block format. */ error = xfs_dir3_data_init(args, blkno, &bp); - if (error) { - kmem_free(sfp); - return error; - } + if (error) + goto out_free; xfs_dir3_block_init(mp, tp, bp, dp); hdr = bp->b_addr; @@ -1136,8 +1140,10 @@ xfs_dir2_sf_to_block( */ dup = dp->d_ops->data_unused_p(hdr); needlog = needscan = 0; - xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i, - i, &needlog, &needscan); + error = xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i, + i, &needlog, &needscan); + if (error) + goto out_free; ASSERT(needscan == 0); /* * Fill in the tail. @@ -1150,9 +1156,11 @@ xfs_dir2_sf_to_block( /* * Remove the freespace, we'll manage it. */ - xfs_dir2_data_use_free(args, bp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), - be16_to_cpu(dup->length), &needlog, &needscan); + error = xfs_dir2_data_use_free(args, bp, dup, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), + be16_to_cpu(dup->length), &needlog, &needscan); + if (error) + goto out_free; /* * Create entry for . */ @@ -1256,4 +1264,7 @@ xfs_dir2_sf_to_block( xfs_dir2_block_log_tail(tp, bp); xfs_dir3_data_check(dp, bp); return 0; +out_free: + kmem_free(sfp); + return error; } diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 920279485275..cb67ec730b9b 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -932,10 +932,51 @@ xfs_dir2_data_make_free( *needscanp = needscan; } +/* Check our free data for obvious signs of corruption. */ +static inline xfs_failaddr_t +xfs_dir2_data_check_free( + struct xfs_dir2_data_hdr *hdr, + struct xfs_dir2_data_unused *dup, + xfs_dir2_data_aoff_t offset, + xfs_dir2_data_aoff_t len) +{ + if (hdr->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC) && + hdr->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC) && + hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) && + hdr->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) + return __this_address; + if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG) + return __this_address; + if (offset < (char *)dup - (char *)hdr) + return __this_address; + if (offset + len > (char *)dup + be16_to_cpu(dup->length) - (char *)hdr) + return __this_address; + if ((char *)dup - (char *)hdr != + be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))) + return __this_address; + return NULL; +} + +/* Sanity-check a new bestfree entry. */ +static inline xfs_failaddr_t +xfs_dir2_data_check_new_free( + struct xfs_dir2_data_hdr *hdr, + struct xfs_dir2_data_free *dfp, + struct xfs_dir2_data_unused *newdup) +{ + if (dfp == NULL) + return __this_address; + if (dfp->length != newdup->length) + return __this_address; + if (be16_to_cpu(dfp->offset) != (char *)newdup - (char *)hdr) + return __this_address; + return NULL; +} + /* * Take a byte range out of an existing unused space and make it un-free. */ -void +int xfs_dir2_data_use_free( struct xfs_da_args *args, struct xfs_buf *bp, @@ -947,23 +988,19 @@ xfs_dir2_data_use_free( { xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ + xfs_dir2_data_unused_t *newdup; /* new unused entry */ + xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ + struct xfs_dir2_data_free *bf; + xfs_failaddr_t fa; int matchback; /* matches end of freespace */ int matchfront; /* matches start of freespace */ int needscan; /* need to regen bestfree */ - xfs_dir2_data_unused_t *newdup; /* new unused entry */ - xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ int oldlen; /* old unused entry's length */ - struct xfs_dir2_data_free *bf; hdr = bp->b_addr; - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); - ASSERT(offset >= (char *)dup - (char *)hdr); - ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr); - ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); + fa = xfs_dir2_data_check_free(hdr, dup, offset, len); + if (fa) + goto corrupt; /* * Look up the entry in the bestfree table. */ @@ -1008,9 +1045,9 @@ xfs_dir2_data_use_free( xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); - ASSERT(dfp != NULL); - ASSERT(dfp->length == newdup->length); - ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); + fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup); + if (fa) + goto corrupt; /* * If we got inserted at the last slot, * that means we don't know if there was a better @@ -1036,9 +1073,9 @@ xfs_dir2_data_use_free( xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); - ASSERT(dfp != NULL); - ASSERT(dfp->length == newdup->length); - ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); + fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup); + if (fa) + goto corrupt; /* * If we got inserted at the last slot, * that means we don't know if there was a better @@ -1084,6 +1121,11 @@ xfs_dir2_data_use_free( } } *needscanp = needscan; + return 0; +corrupt: + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount, + hdr, __FILE__, __LINE__, fa); + return -EFSCORRUPTED; } /* Find the end of the entry data in a data/block format dir block. */ diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index d7e630f41f9c..50fc9c0c5e2b 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -877,9 +877,13 @@ xfs_dir2_leaf_addname( /* * Mark the initial part of our freespace in use for the new entry. */ - xfs_dir2_data_use_free(args, dbp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, - &needlog, &needscan); + error = xfs_dir2_data_use_free(args, dbp, dup, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), + length, &needlog, &needscan); + if (error) { + xfs_trans_brelse(tp, lbp); + return error; + } /* * Initialize our new entry (at last). */ @@ -1415,7 +1419,8 @@ xfs_dir2_leaf_removename( oldbest = be16_to_cpu(bf[0].length); ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); - ASSERT(be16_to_cpu(bestsp[db]) == oldbest); + if (be16_to_cpu(bestsp[db]) != oldbest) + return -EFSCORRUPTED; /* * Mark the former data entry unused. */ diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 239d97a64296..9df096cc3c37 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -387,8 +387,9 @@ xfs_dir2_leaf_to_node( dp->d_ops->free_hdr_from_disk(&freehdr, free); leaf = lbp->b_addr; ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); - ASSERT(be32_to_cpu(ltp->bestcount) <= - (uint)dp->i_d.di_size / args->geo->blksize); + if (be32_to_cpu(ltp->bestcount) > + (uint)dp->i_d.di_size / args->geo->blksize) + return -EFSCORRUPTED; /* * Copy freespace entries from the leaf block to the new block. @@ -1728,6 +1729,7 @@ xfs_dir2_node_addname_int( __be16 *bests; struct xfs_dir3_icfree_hdr freehdr; struct xfs_dir2_data_free *bf; + xfs_dir2_data_aoff_t aoff; dp = args->dp; mp = dp->i_mount; @@ -2022,9 +2024,13 @@ xfs_dir2_node_addname_int( /* * Mark the first part of the unused space, inuse for us. */ - xfs_dir2_data_use_free(args, dbp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, - &needlog, &needscan); + aoff = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); + error = xfs_dir2_data_use_free(args, dbp, dup, aoff, length, + &needlog, &needscan); + if (error) { + xfs_trans_brelse(tp, dbp); + return error; + } /* * Fill in the new entry and log it. */ diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 1acb584fc5f7..42956d8d95ed 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -803,24 +803,13 @@ typedef struct xfs_agi { &(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \ (__be32 *)(bp)->b_addr) -/* - * Size of the AGFL. For CRC-enabled filesystes we steal a couple of - * slots in the beginning of the block for a proper header with the - * location information and CRC. - */ -#define XFS_AGFL_SIZE(mp) \ - (((mp)->m_sb.sb_sectsize - \ - (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ - sizeof(struct xfs_agfl) : 0)) / \ - sizeof(xfs_agblock_t)) - typedef struct xfs_agfl { __be32 agfl_magicnum; __be32 agfl_seqno; uuid_t agfl_uuid; __be64 agfl_lsn; __be32 agfl_crc; - __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ + __be32 agfl_bno[]; /* actually xfs_agfl_size(mp) */ } __attribute__((packed)) xfs_agfl_t; #define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index af197a5f3a82..a2dd7f4a2719 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -93,8 +93,6 @@ __xfs_inobt_alloc_block( int error; /* error return value */ xfs_agblock_t sbno = be32_to_cpu(start->s); - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - memset(&args, 0, sizeof(args)); args.tp = cur->bc_tp; args.mp = cur->bc_mp; @@ -107,17 +105,14 @@ __xfs_inobt_alloc_block( args.resv = resv; error = xfs_alloc_vextent(&args); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } + if (args.fsbno == NULLFSBLOCK) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } ASSERT(args.len == 1); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno)); *stat = 1; diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 4fe17b368316..ef68b1de006a 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -93,20 +93,26 @@ xfs_inode_buf_verify( bool readahead) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_agnumber_t agno; int i; int ni; /* * Validate the magic number and version of every inode in the buffer */ + agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; for (i = 0; i < ni; i++) { int di_ok; xfs_dinode_t *dip; + xfs_agino_t unlinked_ino; dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); + unlinked_ino = be32_to_cpu(dip->di_next_unlinked); di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && - xfs_dinode_good_version(mp, dip->di_version); + xfs_dinode_good_version(mp, dip->di_version) && + (unlinked_ino == NULLAGINO || + xfs_verify_agino(mp, agno, unlinked_ino)); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP))) { if (readahead) { @@ -115,16 +121,18 @@ xfs_inode_buf_verify( return; } - xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); #ifdef DEBUG xfs_alert(mp, "bad inode magic/vsn daddr %lld #%d (magic=%x)", (unsigned long long)bp->b_bn, i, be16_to_cpu(dip->di_magic)); #endif + xfs_buf_verifier_error(bp, -EFSCORRUPTED, + __func__, dip, sizeof(*dip), + NULL); + return; } } - xfs_inobp_check(mp, bp); } @@ -564,10 +572,7 @@ xfs_iread( /* initialise the on-disk inode core */ memset(&ip->i_d, 0, sizeof(ip->i_d)); VFS_I(ip)->i_generation = prandom_u32(); - if (xfs_sb_version_hascrc(&mp->m_sb)) - ip->i_d.di_version = 3; - else - ip->i_d.di_version = 2; + ip->i_d.di_version = 3; return 0; } @@ -649,3 +654,108 @@ xfs_iread( xfs_trans_brelse(tp, bp); return error; } + +/* + * Validate di_extsize hint. + * + * The rules are documented at xfs_ioctl_setattr_check_extsize(). + * These functions must be kept in sync with each other. + */ +xfs_failaddr_t +xfs_inode_validate_extsize( + struct xfs_mount *mp, + uint32_t extsize, + uint16_t mode, + uint16_t flags) +{ + bool rt_flag; + bool hint_flag; + bool inherit_flag; + uint32_t extsize_bytes; + uint32_t blocksize_bytes; + + rt_flag = (flags & XFS_DIFLAG_REALTIME); + hint_flag = (flags & XFS_DIFLAG_EXTSIZE); + inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); + extsize_bytes = XFS_FSB_TO_B(mp, extsize); + + if (rt_flag) + blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; + else + blocksize_bytes = mp->m_sb.sb_blocksize; + + if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) + return __this_address; + + if (hint_flag && !S_ISREG(mode)) + return __this_address; + + if (inherit_flag && !S_ISDIR(mode)) + return __this_address; + + if ((hint_flag || inherit_flag) && extsize == 0) + return __this_address; + + if (!(hint_flag || inherit_flag) && extsize != 0) + return __this_address; + + if (extsize_bytes % blocksize_bytes) + return __this_address; + + if (extsize > MAXEXTLEN) + return __this_address; + + if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) + return __this_address; + + return NULL; +} + +/* + * Validate di_cowextsize hint. + * + * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). + * These functions must be kept in sync with each other. + */ +xfs_failaddr_t +xfs_inode_validate_cowextsize( + struct xfs_mount *mp, + uint32_t cowextsize, + uint16_t mode, + uint16_t flags, + uint64_t flags2) +{ + bool rt_flag; + bool hint_flag; + uint32_t cowextsize_bytes; + + rt_flag = (flags & XFS_DIFLAG_REALTIME); + hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); + cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize); + + if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb)) + return __this_address; + + if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) + return __this_address; + + if (hint_flag && cowextsize == 0) + return __this_address; + + if (!hint_flag && cowextsize != 0) + return __this_address; + + if (hint_flag && rt_flag) + return __this_address; + + if (cowextsize_bytes % mp->m_sb.sb_blocksize) + return __this_address; + + if (cowextsize > MAXEXTLEN) + return __this_address; + + if (cowextsize > mp->m_sb.sb_agblocks / 2) + return __this_address; + + return NULL; +} diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 8a5e1da52d74..d9a376a78ee2 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -84,5 +84,10 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, struct xfs_dinode *dip); +xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp, + uint32_t extsize, uint16_t mode, uint16_t flags); +xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp, + uint32_t cowextsize, uint16_t mode, uint16_t flags, + uint64_t flags2); #endif /* __XFS_INODE_BUF_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 866d2861c625..701c42a28d05 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -195,8 +195,9 @@ xfs_iformat_local( "corrupt inode %Lu (bad size %d for local fork, size = %d).", (unsigned long long) ip->i_ino, size, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); - XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); + xfs_inode_verifier_error(ip, -EFSCORRUPTED, + "xfs_iformat_local", dip, sizeof(*dip), + __this_address); return -EFSCORRUPTED; } @@ -231,8 +232,9 @@ xfs_iformat_extents( if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) { xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", (unsigned long long) ip->i_ino, nex); - XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, - mp, dip); + xfs_inode_verifier_error(ip, -EFSCORRUPTED, + "xfs_iformat_extents(1)", dip, sizeof(*dip), + __this_address); return -EFSCORRUPTED; } @@ -245,10 +247,14 @@ xfs_iformat_extents( xfs_iext_first(ifp, &icur); for (i = 0; i < nex; i++, dp++) { + xfs_failaddr_t fa; + xfs_bmbt_disk_get_all(dp, &new); - if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) { - XFS_ERROR_REPORT("xfs_iformat_extents(2)", - XFS_ERRLEVEL_LOW, mp); + fa = xfs_bmap_validate_extent(ip, whichfork, &new); + if (fa) { + xfs_inode_verifier_error(ip, -EFSCORRUPTED, + "xfs_iformat_extents(2)", + dp, sizeof(*dp), fa); return -EFSCORRUPTED; } @@ -305,8 +311,9 @@ xfs_iformat_btree( level == 0 || level > XFS_BTREE_MAXLEVELS) { xfs_warn(mp, "corrupt inode %Lu (btree).", (unsigned long long) ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, - mp, dip); + xfs_inode_verifier_error(ip, -EFSCORRUPTED, + "xfs_iformat_btree", dfp, size, + __this_address); return -EFSCORRUPTED; } @@ -595,7 +602,7 @@ xfs_iextents_copy( for_each_xfs_iext(ifp, &icur, &rec) { if (isnullstartblock(rec.br_startblock)) continue; - ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, &rec)); + ASSERT(xfs_bmap_validate_extent(ip, whichfork, &rec) == NULL); xfs_bmbt_disk_set_all(dp, &rec); trace_xfs_write_extent(ip, &icur, state, _RET_IP_); copied += sizeof(struct xfs_bmbt_rec); diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 8479769e470d..265fdcefcbae 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -79,8 +79,6 @@ xfs_refcountbt_alloc_block( struct xfs_alloc_arg args; /* block allocation args */ int error; /* error return value */ - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - memset(&args, 0, sizeof(args)); args.tp = cur->bc_tp; args.mp = cur->bc_mp; @@ -98,7 +96,6 @@ xfs_refcountbt_alloc_block( trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno, args.agbno, 1); if (args.fsbno == NULLFSBLOCK) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -109,12 +106,10 @@ xfs_refcountbt_alloc_block( be32_add_cpu(&agf->agf_refcount_blocks, 1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; out_error: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index e829c3e489ea..8b0d0de1cd11 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -104,20 +104,15 @@ xfs_rmapbt_alloc_block( int error; xfs_agblock_t bno; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - /* Allocate the new block from the freelist. If we can't, give up. */ error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, &bno, 1); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno, bno, 1); if (bno == NULLAGBLOCK) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -130,7 +125,8 @@ xfs_rmapbt_alloc_block( be32_add_cpu(&agf->agf_rmap_blocks, 1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + xfs_ag_resv_rmapbt_alloc(cur->bc_mp, cur->bc_private.a.agno); + *stat = 1; return 0; } @@ -158,6 +154,8 @@ xfs_rmapbt_free_block( XFS_EXTENT_BUSY_SKIP_DISCARD); xfs_trans_agbtree_delta(cur->bc_tp, -1); + xfs_ag_resv_rmapbt_free(cur->bc_mp, cur->bc_private.a.agno); + return 0; } diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index a55f7a45fa78..53433cc024fd 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -731,7 +731,6 @@ xfs_sb_mount_common( struct xfs_sb *sbp) { mp->m_agfrotor = mp->m_agirotor = 0; - spin_lock_init(&mp->m_agirotor_lock); mp->m_maxagi = mp->m_sb.sb_agcount; mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 05c66e05ae20..018aabbd9394 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -80,7 +80,7 @@ xfs_scrub_walk_agfl( } /* first to the end */ - for (i = flfirst; i < XFS_AGFL_SIZE(mp); i++) { + for (i = flfirst; i < xfs_agfl_size(mp); i++) { error = fn(sc, be32_to_cpu(agfl_bno[i]), priv); if (error) return error; @@ -664,7 +664,7 @@ xfs_scrub_agf( if (agfl_last > agfl_first) fl_count = agfl_last - agfl_first + 1; else - fl_count = XFS_AGFL_SIZE(mp) - agfl_first + agfl_last + 1; + fl_count = xfs_agfl_size(mp) - agfl_first + agfl_last + 1; if (agfl_count != 0 && fl_count != agfl_count) xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); @@ -791,7 +791,7 @@ xfs_scrub_agfl( /* Allocate buffer to ensure uniqueness of AGFL entries. */ agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); agflcount = be32_to_cpu(agf->agf_flcount); - if (agflcount > XFS_AGFL_SIZE(sc->mp)) { + if (agflcount > xfs_agfl_size(sc->mp)) { xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); goto out; } diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 4ed80474f545..127575f0abfb 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -98,7 +98,7 @@ xfs_scrub_xattr_listent( if (flags & XFS_ATTR_INCOMPLETE) { /* Incomplete attr key, just mark the inode for preening. */ - xfs_scrub_ino_set_preen(sx->sc, context->dp->i_ino, NULL); + xfs_scrub_ino_set_preen(sx->sc, context->dp->i_ino); return; } diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index d00282130492..639d14b51e90 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -37,6 +37,7 @@ #include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" #include "xfs_rmap.h" +#include "xfs_rmap_btree.h" #include "xfs_refcount.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" @@ -423,6 +424,169 @@ xfs_scrub_bmap_btree( return error; } +struct xfs_scrub_bmap_check_rmap_info { + struct xfs_scrub_context *sc; + int whichfork; + struct xfs_iext_cursor icur; +}; + +/* Can we find bmaps that fit this rmap? */ +STATIC int +xfs_scrub_bmap_check_rmap( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *rec, + void *priv) +{ + struct xfs_bmbt_irec irec; + struct xfs_scrub_bmap_check_rmap_info *sbcri = priv; + struct xfs_ifork *ifp; + struct xfs_scrub_context *sc = sbcri->sc; + bool have_map; + + /* Is this even the right fork? */ + if (rec->rm_owner != sc->ip->i_ino) + return 0; + if ((sbcri->whichfork == XFS_ATTR_FORK) ^ + !!(rec->rm_flags & XFS_RMAP_ATTR_FORK)) + return 0; + if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) + return 0; + + /* Now look up the bmbt record. */ + ifp = XFS_IFORK_PTR(sc->ip, sbcri->whichfork); + if (!ifp) { + xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + rec->rm_offset); + goto out; + } + have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset, + &sbcri->icur, &irec); + if (!have_map) + xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + rec->rm_offset); + /* + * bmap extent record lengths are constrained to 2^21 blocks in length + * because of space constraints in the on-disk metadata structure. + * However, rmap extent record lengths are constrained only by AG + * length, so we have to loop through the bmbt to make sure that the + * entire rmap is covered by bmbt records. + */ + while (have_map) { + if (irec.br_startoff != rec->rm_offset) + xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + rec->rm_offset); + if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp, + cur->bc_private.a.agno, rec->rm_startblock)) + xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + rec->rm_offset); + if (irec.br_blockcount > rec->rm_blockcount) + xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + rec->rm_offset); + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + break; + rec->rm_startblock += irec.br_blockcount; + rec->rm_offset += irec.br_blockcount; + rec->rm_blockcount -= irec.br_blockcount; + if (rec->rm_blockcount == 0) + break; + have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec); + if (!have_map) + xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + rec->rm_offset); + } + +out: + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return XFS_BTREE_QUERY_RANGE_ABORT; + return 0; +} + +/* Make sure each rmap has a corresponding bmbt entry. */ +STATIC int +xfs_scrub_bmap_check_ag_rmaps( + struct xfs_scrub_context *sc, + int whichfork, + xfs_agnumber_t agno) +{ + struct xfs_scrub_bmap_check_rmap_info sbcri; + struct xfs_btree_cur *cur; + struct xfs_buf *agf; + int error; + + error = xfs_alloc_read_agf(sc->mp, sc->tp, agno, 0, &agf); + if (error) + return error; + + cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, agno); + if (!cur) { + error = -ENOMEM; + goto out_agf; + } + + sbcri.sc = sc; + sbcri.whichfork = whichfork; + error = xfs_rmap_query_all(cur, xfs_scrub_bmap_check_rmap, &sbcri); + if (error == XFS_BTREE_QUERY_RANGE_ABORT) + error = 0; + + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); +out_agf: + xfs_trans_brelse(sc->tp, agf); + return error; +} + +/* Make sure each rmap has a corresponding bmbt entry. */ +STATIC int +xfs_scrub_bmap_check_rmaps( + struct xfs_scrub_context *sc, + int whichfork) +{ + loff_t size; + xfs_agnumber_t agno; + int error; + + if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb) || + whichfork == XFS_COW_FORK || + (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) + return 0; + + /* Don't support realtime rmap checks yet. */ + if (XFS_IS_REALTIME_INODE(sc->ip) && whichfork == XFS_DATA_FORK) + return 0; + + /* + * Only do this for complex maps that are in btree format, or for + * situations where we would seem to have a size but zero extents. + * The inode repair code can zap broken iforks, which means we have + * to flag this bmap as corrupt if there are rmaps that need to be + * reattached. + */ + switch (whichfork) { + case XFS_DATA_FORK: + size = i_size_read(VFS_I(sc->ip)); + break; + case XFS_ATTR_FORK: + size = XFS_IFORK_Q(sc->ip); + break; + default: + size = 0; + break; + } + if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE && + (size == 0 || XFS_IFORK_NEXTENTS(sc->ip, whichfork) > 0)) + return 0; + + for (agno = 0; agno < sc->mp->m_sb.sb_agcount; agno++) { + error = xfs_scrub_bmap_check_ag_rmaps(sc, whichfork, agno); + if (error) + return error; + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + break; + } + + return 0; +} + /* * Scrub an inode fork's block mappings. * @@ -457,16 +621,16 @@ xfs_scrub_bmap( goto out; /* No CoW forks on non-reflink inodes/filesystems. */ if (!xfs_is_reflink_inode(ip)) { - xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL); + xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino); goto out; } break; case XFS_ATTR_FORK: if (!ifp) - goto out; + goto out_check_rmap; if (!xfs_sb_version_hasattr(&mp->m_sb) && !xfs_sb_version_hasattr2(&mp->m_sb)) - xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL); + xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino); break; default: ASSERT(whichfork == XFS_DATA_FORK); @@ -534,6 +698,10 @@ xfs_scrub_bmap( goto out; } +out_check_rmap: + error = xfs_scrub_bmap_check_rmaps(sc, whichfork); + if (!xfs_scrub_fblock_xref_process_error(sc, whichfork, 0, &error)) + goto out; out: return error; } diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 8033ab9d8f47..8ed91d5c868d 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -213,12 +213,10 @@ xfs_scrub_block_set_preen( void xfs_scrub_ino_set_preen( struct xfs_scrub_context *sc, - xfs_ino_t ino, - struct xfs_buf *bp) + xfs_ino_t ino) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; - trace_xfs_scrub_ino_preen(sc, ino, bp ? bp->b_bn : 0, - __return_address); + trace_xfs_scrub_ino_preen(sc, ino, __return_address); } /* Record a corrupt block. */ @@ -249,22 +247,20 @@ xfs_scrub_block_xref_set_corrupt( void xfs_scrub_ino_set_corrupt( struct xfs_scrub_context *sc, - xfs_ino_t ino, - struct xfs_buf *bp) + xfs_ino_t ino) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; - trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address); + trace_xfs_scrub_ino_error(sc, ino, __return_address); } /* Record a corruption while cross-referencing with an inode. */ void xfs_scrub_ino_xref_set_corrupt( struct xfs_scrub_context *sc, - xfs_ino_t ino, - struct xfs_buf *bp) + xfs_ino_t ino) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; - trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address); + trace_xfs_scrub_ino_error(sc, ino, __return_address); } /* Record corruption in a block indexed by a file fork. */ @@ -296,12 +292,10 @@ xfs_scrub_fblock_xref_set_corrupt( void xfs_scrub_ino_set_warning( struct xfs_scrub_context *sc, - xfs_ino_t ino, - struct xfs_buf *bp) + xfs_ino_t ino) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; - trace_xfs_scrub_ino_warning(sc, ino, bp ? bp->b_bn : 0, - __return_address); + trace_xfs_scrub_ino_warning(sc, ino, __return_address); } /* Warn about a block indexed by a file fork that needs review. */ @@ -619,7 +613,7 @@ xfs_scrub_checkpoint_log( { int error; - error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL); + error = xfs_log_force(mp, XFS_LOG_SYNC); if (error) return error; xfs_ail_push_all_sync(mp->m_ail); diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index ddb65d22c76a..deaf60400981 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -63,25 +63,22 @@ bool xfs_scrub_fblock_xref_process_error(struct xfs_scrub_context *sc, void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc, struct xfs_buf *bp); -void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino, - struct xfs_buf *bp); +void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino); void xfs_scrub_block_set_corrupt(struct xfs_scrub_context *sc, struct xfs_buf *bp); -void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino, - struct xfs_buf *bp); +void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino); void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork, xfs_fileoff_t offset); void xfs_scrub_block_xref_set_corrupt(struct xfs_scrub_context *sc, struct xfs_buf *bp); -void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino, - struct xfs_buf *bp); +void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc, + xfs_ino_t ino); void xfs_scrub_fblock_xref_set_corrupt(struct xfs_scrub_context *sc, int whichfork, xfs_fileoff_t offset); -void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino, - struct xfs_buf *bp); +void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino); void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork, xfs_fileoff_t offset); diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 50b6a26b0299..38f29806eb54 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -781,7 +781,7 @@ xfs_scrub_directory( /* Plausible size? */ if (sc->ip->i_d.di_size < xfs_dir2_sf_hdr_size(0)) { - xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL); + xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino); goto out; } diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 63ab3f98430d..106ca4bd753f 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -259,7 +259,8 @@ xfs_scrub_iallocbt_check_freemask( error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, &dip, &bp, 0, 0); - if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, 0, &error)) + if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur, 0, + &error)) continue; /* Which inodes are free? */ @@ -433,7 +434,7 @@ xfs_scrub_iallocbt_xref_rmap_inodes( if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) return; if (blocks != inode_blocks) - xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0); + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); } /* Scrub the inode btrees for some AG. */ diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 21297bef8df1..df14930e4fc5 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -89,67 +89,21 @@ out: /* Inode core */ -/* - * Validate di_extsize hint. - * - * The rules are documented at xfs_ioctl_setattr_check_extsize(). - * These functions must be kept in sync with each other. - */ +/* Validate di_extsize hint. */ STATIC void xfs_scrub_inode_extsize( struct xfs_scrub_context *sc, - struct xfs_buf *bp, struct xfs_dinode *dip, xfs_ino_t ino, uint16_t mode, uint16_t flags) { - struct xfs_mount *mp = sc->mp; - bool rt_flag; - bool hint_flag; - bool inherit_flag; - uint32_t extsize; - uint32_t extsize_bytes; - uint32_t blocksize_bytes; + xfs_failaddr_t fa; - rt_flag = (flags & XFS_DIFLAG_REALTIME); - hint_flag = (flags & XFS_DIFLAG_EXTSIZE); - inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); - extsize = be32_to_cpu(dip->di_extsize); - extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize); - - if (rt_flag) - blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; - else - blocksize_bytes = mp->m_sb.sb_blocksize; - - if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) - goto bad; - - if (hint_flag && !S_ISREG(mode)) - goto bad; - - if (inherit_flag && !S_ISDIR(mode)) - goto bad; - - if ((hint_flag || inherit_flag) && extsize == 0) - goto bad; - - if (!(hint_flag || inherit_flag) && extsize != 0) - goto bad; - - if (extsize_bytes % blocksize_bytes) - goto bad; - - if (extsize > MAXEXTLEN) - goto bad; - - if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) - goto bad; - - return; -bad: - xfs_scrub_ino_set_corrupt(sc, ino, bp); + fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize), + mode, flags); + if (fa) + xfs_scrub_ino_set_corrupt(sc, ino); } /* @@ -161,58 +115,25 @@ bad: STATIC void xfs_scrub_inode_cowextsize( struct xfs_scrub_context *sc, - struct xfs_buf *bp, struct xfs_dinode *dip, xfs_ino_t ino, uint16_t mode, uint16_t flags, uint64_t flags2) { - struct xfs_mount *mp = sc->mp; - bool rt_flag; - bool hint_flag; - uint32_t extsize; - uint32_t extsize_bytes; + xfs_failaddr_t fa; - rt_flag = (flags & XFS_DIFLAG_REALTIME); - hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); - extsize = be32_to_cpu(dip->di_cowextsize); - extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize); - - if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb)) - goto bad; - - if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) - goto bad; - - if (hint_flag && extsize == 0) - goto bad; - - if (!hint_flag && extsize != 0) - goto bad; - - if (hint_flag && rt_flag) - goto bad; - - if (extsize_bytes % mp->m_sb.sb_blocksize) - goto bad; - - if (extsize > MAXEXTLEN) - goto bad; - - if (extsize > mp->m_sb.sb_agblocks / 2) - goto bad; - - return; -bad: - xfs_scrub_ino_set_corrupt(sc, ino, bp); + fa = xfs_inode_validate_cowextsize(sc->mp, + be32_to_cpu(dip->di_cowextsize), mode, flags, + flags2); + if (fa) + xfs_scrub_ino_set_corrupt(sc, ino); } /* Make sure the di_flags make sense for the inode. */ STATIC void xfs_scrub_inode_flags( struct xfs_scrub_context *sc, - struct xfs_buf *bp, struct xfs_dinode *dip, xfs_ino_t ino, uint16_t mode, @@ -251,14 +172,13 @@ xfs_scrub_inode_flags( return; bad: - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); } /* Make sure the di_flags2 make sense for the inode. */ STATIC void xfs_scrub_inode_flags2( struct xfs_scrub_context *sc, - struct xfs_buf *bp, struct xfs_dinode *dip, xfs_ino_t ino, uint16_t mode, @@ -295,14 +215,13 @@ xfs_scrub_inode_flags2( return; bad: - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); } /* Scrub all the ondisk inode fields. */ STATIC void xfs_scrub_dinode( struct xfs_scrub_context *sc, - struct xfs_buf *bp, struct xfs_dinode *dip, xfs_ino_t ino) { @@ -333,7 +252,7 @@ xfs_scrub_dinode( /* mode is recognized */ break; default: - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); break; } @@ -344,22 +263,22 @@ xfs_scrub_dinode( * We autoconvert v1 inodes into v2 inodes on writeout, * so just mark this inode for preening. */ - xfs_scrub_ino_set_preen(sc, ino, bp); + xfs_scrub_ino_set_preen(sc, ino); break; case 2: case 3: if (dip->di_onlink != 0) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); if (dip->di_mode == 0 && sc->ip) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); if (dip->di_projid_hi != 0 && !xfs_sb_version_hasprojid32bit(&mp->m_sb)) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); break; default: - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); return; } @@ -369,40 +288,40 @@ xfs_scrub_dinode( */ if (dip->di_uid == cpu_to_be32(-1U) || dip->di_gid == cpu_to_be32(-1U)) - xfs_scrub_ino_set_warning(sc, ino, bp); + xfs_scrub_ino_set_warning(sc, ino); /* di_format */ switch (dip->di_format) { case XFS_DINODE_FMT_DEV: if (!S_ISCHR(mode) && !S_ISBLK(mode) && !S_ISFIFO(mode) && !S_ISSOCK(mode)) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); break; case XFS_DINODE_FMT_LOCAL: if (!S_ISDIR(mode) && !S_ISLNK(mode)) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); break; case XFS_DINODE_FMT_EXTENTS: if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); break; case XFS_DINODE_FMT_BTREE: if (!S_ISREG(mode) && !S_ISDIR(mode)) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); break; case XFS_DINODE_FMT_UUID: default: - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); break; } /* di_[amc]time.nsec */ if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); /* * di_size. xfs_dinode_verify checks for things that screw up @@ -411,19 +330,19 @@ xfs_scrub_dinode( */ isize = be64_to_cpu(dip->di_size); if (isize & (1ULL << 63)) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); /* Devices, fifos, and sockets must have zero size */ if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); /* Directories can't be larger than the data section size (32G) */ if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); /* Symlinks can't be larger than SYMLINK_MAXLEN */ if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); /* * Warn if the running kernel can't handle the kinds of offsets @@ -432,7 +351,7 @@ xfs_scrub_dinode( * overly large offsets, flag the inode for admin review. */ if (isize >= mp->m_super->s_maxbytes) - xfs_scrub_ino_set_warning(sc, ino, bp); + xfs_scrub_ino_set_warning(sc, ino); /* di_nblocks */ if (flags2 & XFS_DIFLAG2_REFLINK) { @@ -447,15 +366,15 @@ xfs_scrub_dinode( */ if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); } else { if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); } - xfs_scrub_inode_flags(sc, bp, dip, ino, mode, flags); + xfs_scrub_inode_flags(sc, dip, ino, mode, flags); - xfs_scrub_inode_extsize(sc, bp, dip, ino, mode, flags); + xfs_scrub_inode_extsize(sc, dip, ino, mode, flags); /* di_nextents */ nextents = be32_to_cpu(dip->di_nextents); @@ -463,31 +382,31 @@ xfs_scrub_dinode( switch (dip->di_format) { case XFS_DINODE_FMT_EXTENTS: if (nextents > fork_recs) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); break; case XFS_DINODE_FMT_BTREE: if (nextents <= fork_recs) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); break; default: if (nextents != 0) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); break; } /* di_forkoff */ if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); if (dip->di_anextents != 0 && dip->di_forkoff == 0) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); /* di_aformat */ if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && dip->di_aformat != XFS_DINODE_FMT_EXTENTS && dip->di_aformat != XFS_DINODE_FMT_BTREE) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); /* di_anextents */ nextents = be16_to_cpu(dip->di_anextents); @@ -495,92 +414,26 @@ xfs_scrub_dinode( switch (dip->di_aformat) { case XFS_DINODE_FMT_EXTENTS: if (nextents > fork_recs) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); break; case XFS_DINODE_FMT_BTREE: if (nextents <= fork_recs) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); break; default: if (nextents != 0) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); } if (dip->di_version >= 3) { if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC) - xfs_scrub_ino_set_corrupt(sc, ino, bp); - xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2); - xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags, + xfs_scrub_ino_set_corrupt(sc, ino); + xfs_scrub_inode_flags2(sc, dip, ino, mode, flags, flags2); + xfs_scrub_inode_cowextsize(sc, dip, ino, mode, flags, flags2); } } -/* Map and read a raw inode. */ -STATIC int -xfs_scrub_inode_map_raw( - struct xfs_scrub_context *sc, - xfs_ino_t ino, - struct xfs_buf **bpp, - struct xfs_dinode **dipp) -{ - struct xfs_imap imap; - struct xfs_mount *mp = sc->mp; - struct xfs_buf *bp = NULL; - struct xfs_dinode *dip; - int error; - - error = xfs_imap(mp, sc->tp, ino, &imap, XFS_IGET_UNTRUSTED); - if (error == -EINVAL) { - /* - * Inode could have gotten deleted out from under us; - * just forget about it. - */ - error = -ENOENT; - goto out; - } - if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino), - XFS_INO_TO_AGBNO(mp, ino), &error)) - goto out; - - error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, - imap.im_blkno, imap.im_len, XBF_UNMAPPED, &bp, - NULL); - if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino), - XFS_INO_TO_AGBNO(mp, ino), &error)) - goto out; - - /* - * Is this really an inode? We disabled verifiers in the above - * xfs_trans_read_buf call because the inode buffer verifier - * fails on /any/ inode record in the inode cluster with a bad - * magic or version number, not just the one that we're - * checking. Therefore, grab the buffer unconditionally, attach - * the inode verifiers by hand, and run the inode verifier only - * on the one inode we want. - */ - bp->b_ops = &xfs_inode_buf_ops; - dip = xfs_buf_offset(bp, imap.im_boffset); - if (xfs_dinode_verify(mp, ino, dip) != NULL || - !xfs_dinode_good_version(mp, dip->di_version)) { - xfs_scrub_ino_set_corrupt(sc, ino, bp); - goto out_buf; - } - - /* ...and is it the one we asked for? */ - if (be32_to_cpu(dip->di_gen) != sc->sm->sm_gen) { - error = -ENOENT; - goto out_buf; - } - - *dipp = dip; - *bpp = bp; -out: - return error; -out_buf: - xfs_trans_brelse(sc->tp, bp); - return error; -} - /* * Make sure the finobt doesn't think this inode is free. * We don't have to check the inobt ourselves because we got the inode via @@ -645,18 +498,18 @@ xfs_scrub_inode_xref_bmap( if (!xfs_scrub_should_check_xref(sc, &error, NULL)) return; if (nextents < be32_to_cpu(dip->di_nextents)) - xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL); + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino); error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, &nextents, &acount); if (!xfs_scrub_should_check_xref(sc, &error, NULL)) return; if (nextents != be16_to_cpu(dip->di_anextents)) - xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL); + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino); /* Check nblocks against the inode. */ if (count + acount != be64_to_cpu(dip->di_nblocks)) - xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL); + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino); } /* Cross-reference with the other btrees. */ @@ -700,8 +553,7 @@ xfs_scrub_inode_xref( static void xfs_scrub_inode_check_reflink_iflag( struct xfs_scrub_context *sc, - xfs_ino_t ino, - struct xfs_buf *bp) + xfs_ino_t ino) { struct xfs_mount *mp = sc->mp; bool has_shared; @@ -716,9 +568,9 @@ xfs_scrub_inode_check_reflink_iflag( XFS_INO_TO_AGBNO(mp, ino), &error)) return; if (xfs_is_reflink_inode(sc->ip) && !has_shared) - xfs_scrub_ino_set_preen(sc, ino, bp); + xfs_scrub_ino_set_preen(sc, ino); else if (!xfs_is_reflink_inode(sc->ip) && has_shared) - xfs_scrub_ino_set_corrupt(sc, ino, bp); + xfs_scrub_ino_set_corrupt(sc, ino); } /* Scrub an inode. */ @@ -727,43 +579,33 @@ xfs_scrub_inode( struct xfs_scrub_context *sc) { struct xfs_dinode di; - struct xfs_buf *bp = NULL; - struct xfs_dinode *dip; - xfs_ino_t ino; int error = 0; - /* Did we get the in-core inode, or are we doing this manually? */ - if (sc->ip) { - ino = sc->ip->i_ino; - xfs_inode_to_disk(sc->ip, &di, 0); - dip = &di; - } else { - /* Map & read inode. */ - ino = sc->sm->sm_ino; - error = xfs_scrub_inode_map_raw(sc, ino, &bp, &dip); - if (error || !bp) - goto out; + /* + * If sc->ip is NULL, that means that the setup function called + * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED + * and a NULL inode, so flag the corruption error and return. + */ + if (!sc->ip) { + xfs_scrub_ino_set_corrupt(sc, sc->sm->sm_ino); + return 0; } - xfs_scrub_dinode(sc, bp, dip, ino); + /* Scrub the inode core. */ + xfs_inode_to_disk(sc->ip, &di, 0); + xfs_scrub_dinode(sc, &di, sc->ip->i_ino); if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out; - /* Now let's do the things that require a live inode. */ - if (!sc->ip) - goto out; - /* * Look for discrepancies between file's data blocks and the reflink * iflag. We already checked the iflag against the file mode when * we scrubbed the dinode. */ if (S_ISREG(VFS_I(sc->ip)->i_mode)) - xfs_scrub_inode_check_reflink_iflag(sc, ino, bp); + xfs_scrub_inode_check_reflink_iflag(sc, sc->ip->i_ino); - xfs_scrub_inode_xref(sc, ino, dip); + xfs_scrub_inode_xref(sc, sc->ip->i_ino, &di); out: - if (bp) - xfs_trans_brelse(sc->tp, bp); return error; } diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 0d3851410c74..1fb88c18d455 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -167,8 +167,18 @@ xfs_scrub_parent_validate( * if the parent pointer erroneously points to a file, we * can't use DONTCACHE here because DONTCACHE inodes can trigger * immediate inactive cleanup of the inode. + * + * If _iget returns -EINVAL then the parent inode number is garbage + * and the directory is corrupt. If the _iget returns -EFSCORRUPTED + * or -EFSBADCRC then the parent is corrupt which is a cross + * referencing error. Any other error is an operational error. */ - error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp); + error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp); + if (error == -EINVAL) { + error = -EFSCORRUPTED; + xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); + goto out; + } if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out; if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) { diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 51daa4ae2627..6ba465e6c885 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -219,7 +219,7 @@ xfs_scrub_quota( /* Look for problem extents. */ xfs_ilock(ip, XFS_ILOCK_EXCL); if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) { - xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL); + xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino); goto out_unlock_inode; } max_dqid_off = ((xfs_dqid_t)-1) / qi->qi_dqperchunk; diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 26390991369a..39c41dfe08ee 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -116,8 +116,7 @@ xfs_scrub_xref_is_used_rt_space( if (!xfs_scrub_should_check_xref(sc, &error, NULL)) goto out_unlock; if (is_free) - xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino, - NULL); + xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino); out_unlock: xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); } diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 4dc896852bf0..5d2b1c241be5 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -174,53 +174,32 @@ DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_error); DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_preen); DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class, - TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, xfs_daddr_t daddr, - void *ret_ip), - TP_ARGS(sc, ino, daddr, ret_ip), + TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, void *ret_ip), + TP_ARGS(sc, ino, ret_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(unsigned int, type) - __field(xfs_agnumber_t, agno) - __field(xfs_agblock_t, bno) __field(void *, ret_ip) ), TP_fast_assign( - xfs_fsblock_t fsbno; - xfs_agnumber_t agno; - xfs_agblock_t bno; - - if (daddr) { - fsbno = XFS_DADDR_TO_FSB(sc->mp, daddr); - agno = XFS_FSB_TO_AGNO(sc->mp, fsbno); - bno = XFS_FSB_TO_AGBNO(sc->mp, fsbno); - } else { - agno = XFS_INO_TO_AGNO(sc->mp, ino); - bno = XFS_AGINO_TO_AGBNO(sc->mp, - XFS_INO_TO_AGINO(sc->mp, ino)); - } - __entry->dev = sc->mp->m_super->s_dev; __entry->ino = ino; __entry->type = sc->sm->sm_type; - __entry->agno = agno; - __entry->bno = bno; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino 0x%llx type %u agno %u agbno %u ret_ip %pS", + TP_printk("dev %d:%d ino 0x%llx type %u ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->type, - __entry->agno, - __entry->bno, __entry->ret_ip) ) #define DEFINE_SCRUB_INO_ERROR_EVENT(name) \ DEFINE_EVENT(xfs_scrub_ino_error_class, name, \ TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, \ - xfs_daddr_t daddr, void *ret_ip), \ - TP_ARGS(sc, ino, daddr, ret_ip)) + void *ret_ip), \ + TP_ARGS(sc, ino, ret_ip)) DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_error); DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_preen); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 9c6a830da0ee..19eadc807056 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -209,7 +209,8 @@ xfs_setfilesize_trans_alloc( struct xfs_trans *tp; int error; - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, + XFS_TRANS_NOFS, &tp); if (error) return error; @@ -1330,21 +1331,20 @@ xfs_get_blocks( end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); - error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, - &imap, &nimaps, XFS_BMAPI_ENTIRE); + error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, + &nimaps, 0); if (error) goto out_unlock; - - if (nimaps) { - trace_xfs_get_blocks_found(ip, offset, size, - imap.br_state == XFS_EXT_UNWRITTEN ? - XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap); - xfs_iunlock(ip, lockmode); - } else { + if (!nimaps) { trace_xfs_get_blocks_notfound(ip, offset, size); goto out_unlock; } + trace_xfs_get_blocks_found(ip, offset, size, + imap.br_state == XFS_EXT_UNWRITTEN ? + XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap); + xfs_iunlock(ip, lockmode); + /* trim mapping down to size requested */ xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index c83f549dc17b..05dee8fdd895 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1208,18 +1208,15 @@ xfs_free_file_space( /* * Now that we've unmap all full blocks we'll have to zero out any - * partial block at the beginning and/or end. xfs_zero_range is - * smart enough to skip any holes, including those we just created, - * but we must take care not to zero beyond EOF and enlarge i_size. + * partial block at the beginning and/or end. iomap_zero_range is smart + * enough to skip any holes, including those we just created, but we + * must take care not to zero beyond EOF and enlarge i_size. */ - if (offset >= XFS_ISIZE(ip)) return 0; - if (offset + len > XFS_ISIZE(ip)) len = XFS_ISIZE(ip) - offset; - - return xfs_zero_range(ip, offset, len, NULL); + return iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops); } /* @@ -1899,17 +1896,28 @@ xfs_swap_extents( * performed with log redo items! */ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { + int w = XFS_DATA_FORK; + uint32_t ipnext = XFS_IFORK_NEXTENTS(ip, w); + uint32_t tipnext = XFS_IFORK_NEXTENTS(tip, w); + /* - * Conceptually this shouldn't affect the shape of either - * bmbt, but since we atomically move extents one by one, - * we reserve enough space to rebuild both trees. + * Conceptually this shouldn't affect the shape of either bmbt, + * but since we atomically move extents one by one, we reserve + * enough space to rebuild both trees. */ - resblks = XFS_SWAP_RMAP_SPACE_RES(mp, - XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK), - XFS_DATA_FORK) + - XFS_SWAP_RMAP_SPACE_RES(mp, - XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK), - XFS_DATA_FORK); + resblks = XFS_SWAP_RMAP_SPACE_RES(mp, ipnext, w); + resblks += XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w); + + /* + * Handle the corner case where either inode might straddle the + * btree format boundary. If so, the inode could bounce between + * btree <-> extent format on unmap -> remap cycles, freeing and + * allocating a bmapbt block each time. + */ + if (ipnext == (XFS_IFORK_MAXEXT(ip, w) + 1)) + resblks += XFS_IFORK_MAXEXT(ip, w); + if (tipnext == (XFS_IFORK_MAXEXT(tip, w) + 1)) + resblks += XFS_IFORK_MAXEXT(tip, w); } error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); if (error) @@ -2003,11 +2011,11 @@ xfs_swap_extents( ip->i_cowfp = tip->i_cowfp; tip->i_cowfp = cowfp; - if (ip->i_cowfp && ip->i_cnextents) + if (ip->i_cowfp && ip->i_cowfp->if_bytes) xfs_inode_set_cowblocks_tag(ip); else xfs_inode_clear_cowblocks_tag(ip); - if (tip->i_cowfp && tip->i_cnextents) + if (tip->i_cowfp && tip->i_cowfp->if_bytes) xfs_inode_set_cowblocks_tag(tip); else xfs_inode_clear_cowblocks_tag(tip); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d1da2ee9e6db..ac669a10c62f 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1708,7 +1708,7 @@ xfs_buftarg_isolate( * zero. If the value is already zero, we need to reclaim the * buffer, otherwise it gets another trip through the LRU. */ - if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { + if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) { spin_unlock(&bp->b_lock); return LRU_ROTATE; } diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 270ddb4d2313..82ad270e390e 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -460,7 +460,7 @@ xfs_buf_item_unpin( list_del_init(&bp->b_li_list); bp->b_iodone = NULL; } else { - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR); xfs_buf_item_relse(bp); ASSERT(bp->b_log_item == NULL); @@ -1057,12 +1057,12 @@ xfs_buf_do_callbacks_fail( lip = list_first_entry(&bp->b_li_list, struct xfs_log_item, li_bio_list); ailp = lip->li_ailp; - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { if (lip->li_ops->iop_error) lip->li_ops->iop_error(lip, bp); } - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); } static bool @@ -1226,7 +1226,7 @@ xfs_buf_iodone( * * Either way, AIL is useless if we're forcing a shutdown. */ - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); xfs_buf_item_free(BUF_ITEM(lip)); } @@ -1246,7 +1246,7 @@ xfs_buf_resubmit_failed_buffers( /* * Clear XFS_LI_FAILED flag from all items before resubmit * - * XFS_LI_FAILED set/clear is protected by xa_lock, caller this + * XFS_LI_FAILED set/clear is protected by ail_lock, caller this * function already have it acquired */ list_for_each_entry(lip, &bp->b_li_list, li_bio_list) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 43572f8a1b8e..a7daef9e16bf 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -394,8 +394,6 @@ xfs_qm_dqalloc( error1: xfs_defer_cancel(&dfops); error0: - xfs_iunlock(quotip, XFS_ILOCK_EXCL); - return error; } @@ -920,7 +918,7 @@ xfs_qm_dqflush_done( (lip->li_flags & XFS_LI_FAILED))) { /* xfs_trans_ail_delete() drops the AIL lock. */ - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); if (lip->li_lsn == qip->qli_flush_lsn) { xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); } else { @@ -930,7 +928,7 @@ xfs_qm_dqflush_done( */ if (lip->li_flags & XFS_LI_FAILED) xfs_clear_li_failed(lip); - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); } } diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 96eaa6933709..4b331e354da7 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -157,8 +157,9 @@ xfs_dquot_item_error( STATIC uint xfs_qm_dquot_logitem_push( struct xfs_log_item *lip, - struct list_head *buffer_list) __releases(&lip->li_ailp->xa_lock) - __acquires(&lip->li_ailp->xa_lock) + struct list_head *buffer_list) + __releases(&lip->li_ailp->ail_lock) + __acquires(&lip->li_ailp->ail_lock) { struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; struct xfs_buf *bp = lip->li_buf; @@ -205,7 +206,7 @@ xfs_qm_dquot_logitem_push( goto out_unlock; } - spin_unlock(&lip->li_ailp->xa_lock); + spin_unlock(&lip->li_ailp->ail_lock); error = xfs_qm_dqflush(dqp, &bp); if (error) { @@ -217,7 +218,7 @@ xfs_qm_dquot_logitem_push( xfs_buf_relse(bp); } - spin_lock(&lip->li_ailp->xa_lock); + spin_lock(&lip->li_ailp->ail_lock); out_unlock: xfs_dqunlock(dqp); return rval; @@ -400,7 +401,7 @@ xfs_qm_qoffend_logitem_committed( * Delete the qoff-start logitem from the AIL. * xfs_trans_ail_delete() drops the AIL lock. */ - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); xfs_trans_ail_delete(ailp, &qfs->qql_item, SHUTDOWN_LOG_IO_ERROR); kmem_free(qfs->qql_item.li_lv_shadow); diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index ccf520f0b00d..a63f5083f497 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -342,6 +342,43 @@ xfs_corruption_error( xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); } +/* + * Warnings specifically for verifier errors. Differentiate CRC vs. invalid + * values, and omit the stack trace unless the error level is tuned high. + */ +void +xfs_buf_verifier_error( + struct xfs_buf *bp, + int error, + const char *name, + void *buf, + size_t bufsz, + xfs_failaddr_t failaddr) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; + int sz; + + fa = failaddr ? failaddr : __return_address; + __xfs_buf_ioerror(bp, error, fa); + + xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx %s", + bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", + fa, bp->b_ops->name, bp->b_bn, name); + + xfs_alert(mp, "Unmount and run xfs_repair"); + + if (xfs_error_level >= XFS_ERRLEVEL_LOW) { + sz = min_t(size_t, XFS_CORRUPTION_DUMP_LEN, bufsz); + xfs_alert(mp, "First %d bytes of corrupted metadata buffer:", + sz); + xfs_hex_dump(buf, sz); + } + + if (xfs_error_level >= XFS_ERRLEVEL_HIGH) + xfs_stack_trace(); +} + /* * Warnings specifically for verifier errors. Differentiate CRC vs. invalid * values, and omit the stack trace unless the error level is tuned high. @@ -352,26 +389,8 @@ xfs_verifier_error( int error, xfs_failaddr_t failaddr) { - struct xfs_mount *mp = bp->b_target->bt_mount; - xfs_failaddr_t fa; - - fa = failaddr ? failaddr : __return_address; - __xfs_buf_ioerror(bp, error, fa); - - xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx", - bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", - fa, bp->b_ops->name, bp->b_bn); - - xfs_alert(mp, "Unmount and run xfs_repair"); - - if (xfs_error_level >= XFS_ERRLEVEL_LOW) { - xfs_alert(mp, "First %d bytes of corrupted metadata buffer:", - XFS_CORRUPTION_DUMP_LEN); - xfs_hex_dump(xfs_buf_offset(bp, 0), XFS_CORRUPTION_DUMP_LEN); - } - - if (xfs_error_level >= XFS_ERRLEVEL_HIGH) - xfs_stack_trace(); + return xfs_buf_verifier_error(bp, error, "", xfs_buf_offset(bp, 0), + XFS_CORRUPTION_DUMP_LEN, failaddr); } /* diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 7e728c5a46b8..ce391349e78b 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -26,6 +26,9 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, extern void xfs_corruption_error(const char *tag, int level, struct xfs_mount *mp, void *p, const char *filename, int linenum, xfs_failaddr_t failaddr); +extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error, + const char *name, void *buf, size_t bufsz, + xfs_failaddr_t failaddr); extern void xfs_verifier_error(struct xfs_buf *bp, int error, xfs_failaddr_t failaddr); extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error, diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index fe1bfee35898..761f3189eff2 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -237,7 +237,7 @@ xfs_fs_nfs_commit_metadata( if (!lsn) return 0; - return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); + return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); } const struct export_operations xfs_export_operations = { diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 77760dbf0242..13e3d1a69e76 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -611,10 +611,9 @@ xfs_extent_busy_flush( unsigned busy_gen) { DEFINE_WAIT (wait); - int log_flushed = 0, error; + int error; - trace_xfs_log_force(mp, 0, _THIS_IP_); - error = _xfs_log_force(mp, XFS_LOG_SYNC, &log_flushed); + error = xfs_log_force(mp, XFS_LOG_SYNC); if (error) return; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 9ea08326f876..299aee4b7b0b 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -48,20 +48,6 @@ static const struct vm_operations_struct xfs_file_vm_ops; -/* - * Clear the specified ranges to zero through either the pagecache or DAX. - * Holes and unwritten extents will be left as-is as they already are zeroed. - */ -int -xfs_zero_range( - struct xfs_inode *ip, - xfs_off_t pos, - xfs_off_t count, - bool *did_zero) -{ - return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops); -} - int xfs_update_prealloc_flags( struct xfs_inode *ip, @@ -122,7 +108,7 @@ xfs_dir_fsync( if (!lsn) return 0; - return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); + return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); } STATIC int @@ -182,7 +168,7 @@ xfs_file_fsync( } if (lsn) { - error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); + error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); ip->i_itemp->ili_fsync_fields = 0; } xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -300,31 +286,6 @@ xfs_file_read_iter( return ret; } -/* - * Zero any on disk space between the current EOF and the new, larger EOF. - * - * This handles the normal case of zeroing the remainder of the last block in - * the file and the unusual case of zeroing blocks out beyond the size of the - * file. This second case only happens with fixed size extents and when the - * system crashes before the inode size was updated but after blocks were - * allocated. - * - * Expects the iolock to be held exclusive, and will take the ilock internally. - */ -int /* error (positive) */ -xfs_zero_eof( - struct xfs_inode *ip, - xfs_off_t offset, /* starting I/O offset */ - xfs_fsize_t isize, /* current inode size */ - bool *did_zeroing) -{ - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); - ASSERT(offset > isize); - - trace_xfs_zero_eof(ip, isize, offset - isize); - return xfs_zero_range(ip, isize, offset - isize, did_zeroing); -} - /* * Common pre-write limit and setup checks. * @@ -344,6 +305,7 @@ xfs_file_aio_write_checks( ssize_t error = 0; size_t count = iov_iter_count(from); bool drained_dio = false; + loff_t isize; restart: error = generic_write_checks(iocb, from); @@ -380,7 +342,8 @@ restart: * and hence be able to correctly determine if we need to run zeroing. */ spin_lock(&ip->i_flags_lock); - if (iocb->ki_pos > i_size_read(inode)) { + isize = i_size_read(inode); + if (iocb->ki_pos > isize) { spin_unlock(&ip->i_flags_lock); if (!drained_dio) { if (*iolock == XFS_IOLOCK_SHARED) { @@ -401,7 +364,10 @@ restart: drained_dio = true; goto restart; } - error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL); + + trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize); + error = iomap_zero_range(inode, isize, iocb->ki_pos - isize, + NULL, &xfs_iomap_ops); if (error) return error; } else diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 8b4545623e25..523792768080 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -217,7 +217,7 @@ xfs_growfs_data_private( } agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp); - for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++) + for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++) agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); error = xfs_bwrite(bp); diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index d53a316162d6..9a18f69f6e96 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -483,7 +483,28 @@ xfs_iget_cache_miss( trace_xfs_iget_miss(ip); - if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) { + + /* + * If we are allocating a new inode, then check what was returned is + * actually a free, empty inode. If we are not allocating an inode, + * the check we didn't find a free inode. + */ + if (flags & XFS_IGET_CREATE) { + if (VFS_I(ip)->i_mode != 0) { + xfs_warn(mp, +"Corruption detected! Free inode 0x%llx not marked free on disk", + ino); + error = -EFSCORRUPTED; + goto out_destroy; + } + if (ip->i_d.di_nblocks != 0) { + xfs_warn(mp, +"Corruption detected! Free inode 0x%llx has blocks allocated!", + ino); + error = -EFSCORRUPTED; + goto out_destroy; + } + } else if (VFS_I(ip)->i_mode == 0) { error = -ENOENT; goto out_destroy; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 604ee384a00a..3e3aab3888fa 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1872,6 +1872,7 @@ xfs_inactive( xfs_inode_t *ip) { struct xfs_mount *mp; + struct xfs_ifork *cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); int error; int truncate = 0; @@ -1892,6 +1893,10 @@ xfs_inactive( if (mp->m_flags & XFS_MOUNT_RDONLY) return; + /* Try to clean out the cow blocks if there are any. */ + if (xfs_is_reflink_inode(ip) && cow_ifp->if_bytes > 0) + xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); + if (VFS_I(ip)->i_nlink != 0) { /* * force is true because we are evicting an inode from the @@ -2470,6 +2475,10 @@ xfs_ifree( ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; + + /* Don't attempt to replay owner changes for a deleted inode */ + ip->i_itemp->ili_fields &= ~(XFS_ILOG_AOWNER|XFS_ILOG_DOWNER); + /* * Bump the generation count so no one will be confused * by reincarnations of this inode. @@ -2497,7 +2506,7 @@ xfs_iunpin( trace_xfs_inode_unpin_nowait(ip, _RET_IP_); /* Give the log a push to start the unpinning I/O */ - xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0); + xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 3e8dc990d41c..132d8aa2afc4 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -443,10 +443,6 @@ enum xfs_prealloc_flags { int xfs_update_prealloc_flags(struct xfs_inode *ip, enum xfs_prealloc_flags flags); -int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset, - xfs_fsize_t isize, bool *did_zeroing); -int xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count, - bool *did_zero); /* from xfs_iops.c */ extern void xfs_setup_inode(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index d5037f060d6f..34b91b789702 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -502,8 +502,8 @@ STATIC uint xfs_inode_item_push( struct xfs_log_item *lip, struct list_head *buffer_list) - __releases(&lip->li_ailp->xa_lock) - __acquires(&lip->li_ailp->xa_lock) + __releases(&lip->li_ailp->ail_lock) + __acquires(&lip->li_ailp->ail_lock) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; @@ -562,7 +562,7 @@ xfs_inode_item_push( ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); - spin_unlock(&lip->li_ailp->xa_lock); + spin_unlock(&lip->li_ailp->ail_lock); error = xfs_iflush(ip, &bp); if (!error) { @@ -571,7 +571,7 @@ xfs_inode_item_push( xfs_buf_relse(bp); } - spin_lock(&lip->li_ailp->xa_lock); + spin_lock(&lip->li_ailp->ail_lock); out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); return rval; @@ -579,9 +579,6 @@ out_unlock: /* * Unlock the inode associated with the inode log item. - * Clear the fields of the inode and inode log item that - * are specific to the current transaction. If the - * hold flags is set, do not unlock the inode. */ STATIC void xfs_inode_item_unlock( @@ -637,10 +634,6 @@ xfs_inode_item_committed( return lsn; } -/* - * XXX rcc - this one really has to do something. Probably needs - * to stamp in a new field in the incore inode. - */ STATIC void xfs_inode_item_committing( struct xfs_log_item *lip, @@ -759,7 +752,7 @@ xfs_iflush_done( bool mlip_changed = false; /* this is an opencoded batch version of xfs_trans_ail_delete */ - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); list_for_each_entry(blip, &tmp, li_bio_list) { if (INODE_ITEM(blip)->ili_logged && blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) @@ -770,15 +763,15 @@ xfs_iflush_done( } if (mlip_changed) { - if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount)) - xlog_assign_tail_lsn_locked(ailp->xa_mount); - if (list_empty(&ailp->xa_ail)) - wake_up_all(&ailp->xa_empty); + if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount)) + xlog_assign_tail_lsn_locked(ailp->ail_mount); + if (list_empty(&ailp->ail_head)) + wake_up_all(&ailp->ail_empty); } - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); if (mlip_changed) - xfs_log_space_wake(ailp->xa_mount); + xfs_log_space_wake(ailp->ail_mount); } /* diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 56475fcd76f2..e0307fbff911 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -46,6 +46,7 @@ #include #include #include +#include /* * Directories have different lock order w.r.t. mmap_sem compared to regular @@ -874,7 +875,9 @@ xfs_setattr_size( * truncate. */ if (newsize > oldsize) { - error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing); + trace_xfs_zero_eof(ip, oldsize, newsize - oldsize); + error = iomap_zero_range(inode, oldsize, newsize - oldsize, + &did_zeroing, &xfs_iomap_ops); } else { error = iomap_truncate_page(inode, newsize, &did_zeroing, &xfs_iomap_ops); @@ -1052,11 +1055,21 @@ xfs_vn_update_time( { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; + int log_flags = XFS_ILOG_TIMESTAMP; struct xfs_trans *tp; int error; trace_xfs_update_time(ip); + if (inode->i_sb->s_flags & SB_LAZYTIME) { + if (!((flags & S_VERSION) && + inode_maybe_inc_iversion(inode, false))) + return generic_update_time(inode, now, flags); + + /* Capture the iversion update that just occurred */ + log_flags |= XFS_ILOG_CORE; + } + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); if (error) return error; @@ -1070,7 +1083,7 @@ xfs_vn_update_time( inode->i_atime = *now; xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); + xfs_trans_log_inode(tp, ip, log_flags); return xfs_trans_commit(tp); } diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 3e5ba1ecc080..b9c9c848146b 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -869,7 +869,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) return 0; } - error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL); + error = xfs_log_force(mp, XFS_LOG_SYNC); ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); #ifdef DEBUG @@ -1149,7 +1149,7 @@ xlog_assign_tail_lsn_locked( struct xfs_log_item *lip; xfs_lsn_t tail_lsn; - assert_spin_locked(&mp->m_ail->xa_lock); + assert_spin_locked(&mp->m_ail->ail_lock); /* * To make sure we always have a valid LSN for the log tail we keep @@ -1172,9 +1172,9 @@ xlog_assign_tail_lsn( { xfs_lsn_t tail_lsn; - spin_lock(&mp->m_ail->xa_lock); + spin_lock(&mp->m_ail->ail_lock); tail_lsn = xlog_assign_tail_lsn_locked(mp); - spin_unlock(&mp->m_ail->xa_lock); + spin_unlock(&mp->m_ail->ail_lock); return tail_lsn; } @@ -3304,129 +3304,178 @@ xlog_state_switch_iclogs( * not in the active nor dirty state. */ int -_xfs_log_force( +xfs_log_force( struct xfs_mount *mp, - uint flags, - int *log_flushed) + uint flags) { struct xlog *log = mp->m_log; struct xlog_in_core *iclog; xfs_lsn_t lsn; XFS_STATS_INC(mp, xs_log_force); + trace_xfs_log_force(mp, 0, _RET_IP_); xlog_cil_force(log); spin_lock(&log->l_icloglock); - iclog = log->l_iclog; - if (iclog->ic_state & XLOG_STATE_IOERROR) { - spin_unlock(&log->l_icloglock); - return -EIO; - } + if (iclog->ic_state & XLOG_STATE_IOERROR) + goto out_error; - /* If the head iclog is not active nor dirty, we just attach - * ourselves to the head and go to sleep. - */ - if (iclog->ic_state == XLOG_STATE_ACTIVE || - iclog->ic_state == XLOG_STATE_DIRTY) { + if (iclog->ic_state == XLOG_STATE_DIRTY || + (iclog->ic_state == XLOG_STATE_ACTIVE && + atomic_read(&iclog->ic_refcnt) == 0 && iclog->ic_offset == 0)) { /* - * If the head is dirty or (active and empty), then - * we need to look at the previous iclog. If the previous - * iclog is active or dirty we are done. There is nothing - * to sync out. Otherwise, we attach ourselves to the + * If the head is dirty or (active and empty), then we need to + * look at the previous iclog. + * + * If the previous iclog is active or dirty we are done. There + * is nothing to sync out. Otherwise, we attach ourselves to the * previous iclog and go to sleep. */ - if (iclog->ic_state == XLOG_STATE_DIRTY || - (atomic_read(&iclog->ic_refcnt) == 0 - && iclog->ic_offset == 0)) { - iclog = iclog->ic_prev; - if (iclog->ic_state == XLOG_STATE_ACTIVE || - iclog->ic_state == XLOG_STATE_DIRTY) - goto no_sleep; - else - goto maybe_sleep; - } else { - if (atomic_read(&iclog->ic_refcnt) == 0) { - /* We are the only one with access to this - * iclog. Flush it out now. There should - * be a roundoff of zero to show that someone - * has already taken care of the roundoff from - * the previous sync. - */ - atomic_inc(&iclog->ic_refcnt); - lsn = be64_to_cpu(iclog->ic_header.h_lsn); - xlog_state_switch_iclogs(log, iclog, 0); - spin_unlock(&log->l_icloglock); - - if (xlog_state_release_iclog(log, iclog)) - return -EIO; - - if (log_flushed) - *log_flushed = 1; - spin_lock(&log->l_icloglock); - if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && - iclog->ic_state != XLOG_STATE_DIRTY) - goto maybe_sleep; - else - goto no_sleep; - } else { - /* Someone else is writing to this iclog. - * Use its call to flush out the data. However, - * the other thread may not force out this LR, - * so we mark it WANT_SYNC. - */ - xlog_state_switch_iclogs(log, iclog, 0); - goto maybe_sleep; - } - } - } - - /* By the time we come around again, the iclog could've been filled - * which would give it another lsn. If we have a new lsn, just - * return because the relevant data has been flushed. - */ -maybe_sleep: - if (flags & XFS_LOG_SYNC) { - /* - * We must check if we're shutting down here, before - * we wait, while we're holding the l_icloglock. - * Then we check again after waking up, in case our - * sleep was disturbed by a bad news. - */ - if (iclog->ic_state & XLOG_STATE_IOERROR) { + iclog = iclog->ic_prev; + if (iclog->ic_state == XLOG_STATE_ACTIVE || + iclog->ic_state == XLOG_STATE_DIRTY) + goto out_unlock; + } else if (iclog->ic_state == XLOG_STATE_ACTIVE) { + if (atomic_read(&iclog->ic_refcnt) == 0) { + /* + * We are the only one with access to this iclog. + * + * Flush it out now. There should be a roundoff of zero + * to show that someone has already taken care of the + * roundoff from the previous sync. + */ + atomic_inc(&iclog->ic_refcnt); + lsn = be64_to_cpu(iclog->ic_header.h_lsn); + xlog_state_switch_iclogs(log, iclog, 0); spin_unlock(&log->l_icloglock); - return -EIO; - } - XFS_STATS_INC(mp, xs_log_force_sleep); - xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); - /* - * No need to grab the log lock here since we're - * only deciding whether or not to return EIO - * and the memory read should be atomic. - */ - if (iclog->ic_state & XLOG_STATE_IOERROR) - return -EIO; - } else { -no_sleep: - spin_unlock(&log->l_icloglock); + if (xlog_state_release_iclog(log, iclog)) + return -EIO; + + spin_lock(&log->l_icloglock); + if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn || + iclog->ic_state == XLOG_STATE_DIRTY) + goto out_unlock; + } else { + /* + * Someone else is writing to this iclog. + * + * Use its call to flush out the data. However, the + * other thread may not force out this LR, so we mark + * it WANT_SYNC. + */ + xlog_state_switch_iclogs(log, iclog, 0); + } + } else { + /* + * If the head iclog is not active nor dirty, we just attach + * ourselves to the head and go to sleep if necessary. + */ + ; } + + if (!(flags & XFS_LOG_SYNC)) + goto out_unlock; + + if (iclog->ic_state & XLOG_STATE_IOERROR) + goto out_error; + XFS_STATS_INC(mp, xs_log_force_sleep); + xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); + if (iclog->ic_state & XLOG_STATE_IOERROR) + return -EIO; return 0; + +out_unlock: + spin_unlock(&log->l_icloglock); + return 0; +out_error: + spin_unlock(&log->l_icloglock); + return -EIO; } -/* - * Wrapper for _xfs_log_force(), to be used when caller doesn't care - * about errors or whether the log was flushed or not. This is the normal - * interface to use when trying to unpin items or move the log forward. - */ -void -xfs_log_force( - xfs_mount_t *mp, - uint flags) +static int +__xfs_log_force_lsn( + struct xfs_mount *mp, + xfs_lsn_t lsn, + uint flags, + int *log_flushed, + bool already_slept) { - trace_xfs_log_force(mp, 0, _RET_IP_); - _xfs_log_force(mp, flags, NULL); + struct xlog *log = mp->m_log; + struct xlog_in_core *iclog; + + spin_lock(&log->l_icloglock); + iclog = log->l_iclog; + if (iclog->ic_state & XLOG_STATE_IOERROR) + goto out_error; + + while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { + iclog = iclog->ic_next; + if (iclog == log->l_iclog) + goto out_unlock; + } + + if (iclog->ic_state == XLOG_STATE_DIRTY) + goto out_unlock; + + if (iclog->ic_state == XLOG_STATE_ACTIVE) { + /* + * We sleep here if we haven't already slept (e.g. this is the + * first time we've looked at the correct iclog buf) and the + * buffer before us is going to be sync'ed. The reason for this + * is that if we are doing sync transactions here, by waiting + * for the previous I/O to complete, we can allow a few more + * transactions into this iclog before we close it down. + * + * Otherwise, we mark the buffer WANT_SYNC, and bump up the + * refcnt so we can release the log (which drops the ref count). + * The state switch keeps new transaction commits from using + * this buffer. When the current commits finish writing into + * the buffer, the refcount will drop to zero and the buffer + * will go out then. + */ + if (!already_slept && + (iclog->ic_prev->ic_state & + (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) { + ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); + + XFS_STATS_INC(mp, xs_log_force_sleep); + + xlog_wait(&iclog->ic_prev->ic_write_wait, + &log->l_icloglock); + return -EAGAIN; + } + atomic_inc(&iclog->ic_refcnt); + xlog_state_switch_iclogs(log, iclog, 0); + spin_unlock(&log->l_icloglock); + if (xlog_state_release_iclog(log, iclog)) + return -EIO; + if (log_flushed) + *log_flushed = 1; + spin_lock(&log->l_icloglock); + } + + if (!(flags & XFS_LOG_SYNC) || + (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) + goto out_unlock; + + if (iclog->ic_state & XLOG_STATE_IOERROR) + goto out_error; + + XFS_STATS_INC(mp, xs_log_force_sleep); + xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); + if (iclog->ic_state & XLOG_STATE_IOERROR) + return -EIO; + return 0; + +out_unlock: + spin_unlock(&log->l_icloglock); + return 0; +out_error: + spin_unlock(&log->l_icloglock); + return -EIO; } /* @@ -3438,135 +3487,32 @@ xfs_log_force( * state and go to sleep or return. * If it is in any other state, go to sleep or return. * - * Synchronous forces are implemented with a signal variable. All callers - * to force a given lsn to disk will wait on a the sv attached to the - * specific in-core log. When given in-core log finally completes its - * write to disk, that thread will wake up all threads waiting on the - * sv. + * Synchronous forces are implemented with a wait queue. All callers trying + * to force a given lsn to disk must wait on the queue attached to the + * specific in-core log. When given in-core log finally completes its write + * to disk, that thread will wake up all threads waiting on the queue. */ int -_xfs_log_force_lsn( +xfs_log_force_lsn( struct xfs_mount *mp, xfs_lsn_t lsn, uint flags, int *log_flushed) { - struct xlog *log = mp->m_log; - struct xlog_in_core *iclog; - int already_slept = 0; - + int ret; ASSERT(lsn != 0); XFS_STATS_INC(mp, xs_log_force); + trace_xfs_log_force(mp, lsn, _RET_IP_); - lsn = xlog_cil_force_lsn(log, lsn); + lsn = xlog_cil_force_lsn(mp->m_log, lsn); if (lsn == NULLCOMMITLSN) return 0; -try_again: - spin_lock(&log->l_icloglock); - iclog = log->l_iclog; - if (iclog->ic_state & XLOG_STATE_IOERROR) { - spin_unlock(&log->l_icloglock); - return -EIO; - } - - do { - if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { - iclog = iclog->ic_next; - continue; - } - - if (iclog->ic_state == XLOG_STATE_DIRTY) { - spin_unlock(&log->l_icloglock); - return 0; - } - - if (iclog->ic_state == XLOG_STATE_ACTIVE) { - /* - * We sleep here if we haven't already slept (e.g. - * this is the first time we've looked at the correct - * iclog buf) and the buffer before us is going to - * be sync'ed. The reason for this is that if we - * are doing sync transactions here, by waiting for - * the previous I/O to complete, we can allow a few - * more transactions into this iclog before we close - * it down. - * - * Otherwise, we mark the buffer WANT_SYNC, and bump - * up the refcnt so we can release the log (which - * drops the ref count). The state switch keeps new - * transaction commits from using this buffer. When - * the current commits finish writing into the buffer, - * the refcount will drop to zero and the buffer will - * go out then. - */ - if (!already_slept && - (iclog->ic_prev->ic_state & - (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) { - ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); - - XFS_STATS_INC(mp, xs_log_force_sleep); - - xlog_wait(&iclog->ic_prev->ic_write_wait, - &log->l_icloglock); - already_slept = 1; - goto try_again; - } - atomic_inc(&iclog->ic_refcnt); - xlog_state_switch_iclogs(log, iclog, 0); - spin_unlock(&log->l_icloglock); - if (xlog_state_release_iclog(log, iclog)) - return -EIO; - if (log_flushed) - *log_flushed = 1; - spin_lock(&log->l_icloglock); - } - - if ((flags & XFS_LOG_SYNC) && /* sleep */ - !(iclog->ic_state & - (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { - /* - * Don't wait on completion if we know that we've - * gotten a log write error. - */ - if (iclog->ic_state & XLOG_STATE_IOERROR) { - spin_unlock(&log->l_icloglock); - return -EIO; - } - XFS_STATS_INC(mp, xs_log_force_sleep); - xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); - /* - * No need to grab the log lock here since we're - * only deciding whether or not to return EIO - * and the memory read should be atomic. - */ - if (iclog->ic_state & XLOG_STATE_IOERROR) - return -EIO; - } else { /* just return */ - spin_unlock(&log->l_icloglock); - } - - return 0; - } while (iclog != log->l_iclog); - - spin_unlock(&log->l_icloglock); - return 0; -} - -/* - * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care - * about errors or whether the log was flushed or not. This is the normal - * interface to use when trying to unpin items or move the log forward. - */ -void -xfs_log_force_lsn( - xfs_mount_t *mp, - xfs_lsn_t lsn, - uint flags) -{ - trace_xfs_log_force(mp, lsn, _RET_IP_); - _xfs_log_force_lsn(mp, lsn, flags, NULL); + ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false); + if (ret == -EAGAIN) + ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true); + return ret; } /* @@ -4035,7 +3981,7 @@ xfs_log_force_umount( * to guarantee this. */ if (!logerror) - _xfs_log_force(mp, XFS_LOG_SYNC, NULL); + xfs_log_force(mp, XFS_LOG_SYNC); /* * mark the filesystem and the as in a shutdown state and wake diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index bf212772595c..7e2d62922a16 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -129,18 +129,9 @@ xfs_lsn_t xfs_log_done(struct xfs_mount *mp, struct xlog_ticket *ticket, struct xlog_in_core **iclog, bool regrant); -int _xfs_log_force(struct xfs_mount *mp, - uint flags, - int *log_forced); -void xfs_log_force(struct xfs_mount *mp, - uint flags); -int _xfs_log_force_lsn(struct xfs_mount *mp, - xfs_lsn_t lsn, - uint flags, - int *log_forced); -void xfs_log_force_lsn(struct xfs_mount *mp, - xfs_lsn_t lsn, - uint flags); +int xfs_log_force(struct xfs_mount *mp, uint flags); +int xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags, + int *log_forced); int xfs_log_mount(struct xfs_mount *mp, struct xfs_buftarg *log_target, xfs_daddr_t start_block, diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 43aa42a3a5d3..cb376ac8a595 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -202,7 +202,7 @@ xlog_cil_alloc_shadow_bufs( */ kmem_free(lip->li_lv_shadow); - lv = kmem_alloc(buf_size, KM_SLEEP|KM_NOFS); + lv = kmem_alloc_large(buf_size, KM_SLEEP | KM_NOFS); memset(lv, 0, xlog_cil_iovec_space(niovecs)); lv->lv_item = lip; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 00240c9ee72e..2b2383f1895e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3173,13 +3173,6 @@ xlog_recover_inode_pass2( /* recover the log dinode inode into the on disk inode */ xfs_log_dinode_to_disk(ldip, dip); - /* the rest is in on-disk format */ - if (item->ri_buf[1].i_len > isize) { - memcpy((char *)dip + isize, - item->ri_buf[1].i_addr + isize, - item->ri_buf[1].i_len - isize); - } - fields = in_f->ilf_fields; if (fields & XFS_ILOG_DEV) xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); @@ -3252,7 +3245,9 @@ xlog_recover_inode_pass2( } out_owner_change: - if (in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) + /* Recover the swapext owner change unless inode has been deleted */ + if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) && + (dip->di_mode != 0)) error = xfs_recover_inode_owner_change(mp, dip, in_f, buffer_list); /* re-generate the checksum. */ @@ -3434,7 +3429,7 @@ xlog_recover_efi_pass2( } atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); - spin_lock(&log->l_ailp->xa_lock); + spin_lock(&log->l_ailp->ail_lock); /* * The EFI has two references. One for the EFD and one for EFI to ensure * it makes it into the AIL. Insert the EFI into the AIL directly and @@ -3477,7 +3472,7 @@ xlog_recover_efd_pass2( * Search for the EFI with the id in the EFD format structure in the * AIL. */ - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); while (lip != NULL) { if (lip->li_type == XFS_LI_EFI) { @@ -3487,9 +3482,9 @@ xlog_recover_efd_pass2( * Drop the EFD reference to the EFI. This * removes the EFI from the AIL and frees it. */ - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); xfs_efi_release(efip); - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); break; } } @@ -3497,7 +3492,7 @@ xlog_recover_efd_pass2( } xfs_trans_ail_cursor_done(&cur); - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); return 0; } @@ -3530,7 +3525,7 @@ xlog_recover_rui_pass2( } atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents); - spin_lock(&log->l_ailp->xa_lock); + spin_lock(&log->l_ailp->ail_lock); /* * The RUI has two references. One for the RUD and one for RUI to ensure * it makes it into the AIL. Insert the RUI into the AIL directly and @@ -3570,7 +3565,7 @@ xlog_recover_rud_pass2( * Search for the RUI with the id in the RUD format structure in the * AIL. */ - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); while (lip != NULL) { if (lip->li_type == XFS_LI_RUI) { @@ -3580,9 +3575,9 @@ xlog_recover_rud_pass2( * Drop the RUD reference to the RUI. This * removes the RUI from the AIL and frees it. */ - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); xfs_rui_release(ruip); - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); break; } } @@ -3590,7 +3585,7 @@ xlog_recover_rud_pass2( } xfs_trans_ail_cursor_done(&cur); - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); return 0; } @@ -3646,7 +3641,7 @@ xlog_recover_cui_pass2( } atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); - spin_lock(&log->l_ailp->xa_lock); + spin_lock(&log->l_ailp->ail_lock); /* * The CUI has two references. One for the CUD and one for CUI to ensure * it makes it into the AIL. Insert the CUI into the AIL directly and @@ -3687,7 +3682,7 @@ xlog_recover_cud_pass2( * Search for the CUI with the id in the CUD format structure in the * AIL. */ - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); while (lip != NULL) { if (lip->li_type == XFS_LI_CUI) { @@ -3697,9 +3692,9 @@ xlog_recover_cud_pass2( * Drop the CUD reference to the CUI. This * removes the CUI from the AIL and frees it. */ - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); xfs_cui_release(cuip); - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); break; } } @@ -3707,7 +3702,7 @@ xlog_recover_cud_pass2( } xfs_trans_ail_cursor_done(&cur); - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); return 0; } @@ -3765,7 +3760,7 @@ xlog_recover_bui_pass2( } atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents); - spin_lock(&log->l_ailp->xa_lock); + spin_lock(&log->l_ailp->ail_lock); /* * The RUI has two references. One for the RUD and one for RUI to ensure * it makes it into the AIL. Insert the RUI into the AIL directly and @@ -3806,7 +3801,7 @@ xlog_recover_bud_pass2( * Search for the BUI with the id in the BUD format structure in the * AIL. */ - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); while (lip != NULL) { if (lip->li_type == XFS_LI_BUI) { @@ -3816,9 +3811,9 @@ xlog_recover_bud_pass2( * Drop the BUD reference to the BUI. This * removes the BUI from the AIL and frees it. */ - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); xfs_bui_release(buip); - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); break; } } @@ -3826,7 +3821,7 @@ xlog_recover_bud_pass2( } xfs_trans_ail_cursor_done(&cur); - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); return 0; } @@ -4659,9 +4654,9 @@ xlog_recover_process_efi( if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) return 0; - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); error = xfs_efi_recover(mp, efip); - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); return error; } @@ -4677,9 +4672,9 @@ xlog_recover_cancel_efi( efip = container_of(lip, struct xfs_efi_log_item, efi_item); - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); xfs_efi_release(efip); - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); } /* Recover the RUI if necessary. */ @@ -4699,9 +4694,9 @@ xlog_recover_process_rui( if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags)) return 0; - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); error = xfs_rui_recover(mp, ruip); - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); return error; } @@ -4717,9 +4712,9 @@ xlog_recover_cancel_rui( ruip = container_of(lip, struct xfs_rui_log_item, rui_item); - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); xfs_rui_release(ruip); - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); } /* Recover the CUI if necessary. */ @@ -4740,9 +4735,9 @@ xlog_recover_process_cui( if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)) return 0; - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); error = xfs_cui_recover(mp, cuip, dfops); - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); return error; } @@ -4758,9 +4753,9 @@ xlog_recover_cancel_cui( cuip = container_of(lip, struct xfs_cui_log_item, cui_item); - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); xfs_cui_release(cuip); - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); } /* Recover the BUI if necessary. */ @@ -4781,9 +4776,9 @@ xlog_recover_process_bui( if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)) return 0; - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); error = xfs_bui_recover(mp, buip, dfops); - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); return error; } @@ -4799,9 +4794,9 @@ xlog_recover_cancel_bui( buip = container_of(lip, struct xfs_bui_log_item, bui_item); - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); xfs_bui_release(buip); - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); } /* Is this log item a deferred action intent? */ @@ -4889,7 +4884,7 @@ xlog_recover_process_intents( #endif ailp = log->l_ailp; - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); #if defined(DEBUG) || defined(XFS_WARN) last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block); @@ -4943,7 +4938,7 @@ xlog_recover_process_intents( } out: xfs_trans_ail_cursor_done(&cur); - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); if (error) xfs_defer_cancel(&dfops); else @@ -4966,7 +4961,7 @@ xlog_recover_cancel_intents( struct xfs_ail *ailp; ailp = log->l_ailp; - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); while (lip != NULL) { /* @@ -5000,7 +4995,7 @@ xlog_recover_cancel_intents( } xfs_trans_ail_cursor_done(&cur); - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); return error; } @@ -5127,16 +5122,9 @@ xlog_recover_process_iunlinks( xfs_agino_t agino; int bucket; int error; - uint mp_dmevmask; mp = log->l_mp; - /* - * Prevent any DMAPI event from being sent while in this function. - */ - mp_dmevmask = mp->m_dmevmask; - mp->m_dmevmask = 0; - for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { /* * Find the agi for this ag. @@ -5172,8 +5160,6 @@ xlog_recover_process_iunlinks( } xfs_buf_rele(agibp); } - - mp->m_dmevmask = mp_dmevmask; } STATIC int diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 98fd41cbb9e1..a901b86772f8 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -803,8 +803,6 @@ xfs_mountfs( get_unaligned_be16(&sbp->sb_uuid.b[4]); mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]); - mp->m_dmevmask = 0; /* not persistent; set after each mount */ - error = xfs_da_mount(mp); if (error) { xfs_warn(mp, "Failed dir/attr init: %d", error); @@ -819,8 +817,6 @@ xfs_mountfs( /* * Allocate and initialize the per-ag data. */ - spin_lock_init(&mp->m_perag_lock); - INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); if (error) { xfs_warn(mp, "Failed per-ag init: %d", error); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index e0792d036be2..10b90bbc5162 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -138,7 +138,6 @@ typedef struct xfs_mount { spinlock_t m_perag_lock; /* lock for m_perag_tree */ struct mutex m_growlock; /* growfs mutex */ int m_fixedfsid[2]; /* unchanged for life of FS */ - uint m_dmevmask; /* DMI events for this FS */ uint64_t m_flags; /* global mount flags */ bool m_inotbt_nores; /* no per-AG finobt resv. */ int m_ialloc_inos; /* inodes in inode allocation */ @@ -326,8 +325,9 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) /* per-AG block reservation data structures*/ enum xfs_ag_resv_type { XFS_AG_RESV_NONE = 0, - XFS_AG_RESV_METADATA, XFS_AG_RESV_AGFL, + XFS_AG_RESV_METADATA, + XFS_AG_RESV_RMAPBT, }; struct xfs_ag_resv { @@ -353,6 +353,7 @@ typedef struct xfs_perag { char pagi_inodeok; /* The agi is ok for inodes */ uint8_t pagf_levels[XFS_BTNUM_AGF]; /* # of levels in bno & cnt btree */ + bool pagf_agflreset; /* agfl requires reset before use */ uint32_t pagf_flcount; /* count of blocks in freelist */ xfs_extlen_t pagf_freeblks; /* total free blocks */ xfs_extlen_t pagf_longest; /* longest free space */ @@ -391,8 +392,8 @@ typedef struct xfs_perag { /* Blocks reserved for all kinds of metadata. */ struct xfs_ag_resv pag_meta_resv; - /* Blocks reserved for just AGFL-based metadata. */ - struct xfs_ag_resv pag_agfl_resv; + /* Blocks reserved for the reverse mapping btree. */ + struct xfs_ag_resv pag_rmapbt_resv; /* reference count */ uint8_t pagf_refcount_level; @@ -406,8 +407,8 @@ xfs_perag_resv( switch (type) { case XFS_AG_RESV_METADATA: return &pag->pag_meta_resv; - case XFS_AG_RESV_AGFL: - return &pag->pag_agfl_resv; + case XFS_AG_RESV_RMAPBT: + return &pag->pag_rmapbt_resv; default: return NULL; } diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 270246943a06..cdbd342a5249 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -394,7 +394,7 @@ xfs_reflink_allocate_cow( retry: ASSERT(xfs_is_reflink_inode(ip)); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); /* * Even if the extent is not shared we might have a preallocation for @@ -668,7 +668,7 @@ xfs_reflink_cancel_cow_range( /* Start a rolling transaction to remove the mappings */ error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, - 0, 0, 0, &tp); + 0, 0, XFS_TRANS_NOFS, &tp); if (error) goto out; @@ -741,7 +741,7 @@ xfs_reflink_end_cow( (unsigned int)(end_fsb - offset_fsb), XFS_DATA_FORK); error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, - resblks, 0, XFS_TRANS_RESERVE, &tp); + resblks, 0, XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp); if (error) goto out; @@ -762,10 +762,8 @@ xfs_reflink_end_cow( xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); /* Extent delete may have bumped ext forward */ - if (!del.br_blockcount) { - xfs_iext_prev(ifp, &icur); - goto next_extent; - } + if (!del.br_blockcount) + goto prev_extent; ASSERT(!isnullstartblock(got.br_startblock)); @@ -774,10 +772,8 @@ xfs_reflink_end_cow( * speculatively preallocated CoW extents that have been * allocated but have not yet been involved in a write. */ - if (got.br_state == XFS_EXT_UNWRITTEN) { - xfs_iext_prev(ifp, &icur); - goto next_extent; - } + if (got.br_state == XFS_EXT_UNWRITTEN) + goto prev_extent; /* Unmap the old blocks in the data fork. */ xfs_defer_init(&dfops, &firstfsb); @@ -816,9 +812,12 @@ xfs_reflink_end_cow( error = xfs_defer_finish(&tp, &dfops); if (error) goto out_defer; -next_extent: if (!xfs_iext_get_extent(ifp, &icur, &got)) break; + continue; +prev_extent: + if (!xfs_iext_prev_extent(ifp, &icur, &got)) + break; } error = xfs_trans_commit(tp); @@ -1061,7 +1060,7 @@ xfs_reflink_ag_has_free_space( return 0; pag = xfs_perag_get(mp, agno); - if (xfs_ag_resv_critical(pag, XFS_AG_RESV_AGFL) || + if (xfs_ag_resv_critical(pag, XFS_AG_RESV_RMAPBT) || xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA)) error = -ENOSPC; xfs_perag_put(pag); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 93588ea3d3d2..612c1d5348b3 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -972,7 +972,6 @@ xfs_fs_destroy_inode( struct inode *inode) { struct xfs_inode *ip = XFS_I(inode); - int error; trace_xfs_destroy_inode(ip); @@ -980,14 +979,6 @@ xfs_fs_destroy_inode( XFS_STATS_INC(ip->i_mount, vn_rele); XFS_STATS_INC(ip->i_mount, vn_remove); - if (xfs_is_reflink_inode(ip)) { - error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); - if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) - xfs_warn(ip->i_mount, -"Error %d while evicting CoW blocks for inode %llu.", - error, ip->i_ino); - } - xfs_inactive(ip); ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); @@ -1009,6 +1000,28 @@ xfs_fs_destroy_inode( xfs_inode_set_reclaim_tag(ip); } +static void +xfs_fs_dirty_inode( + struct inode *inode, + int flag) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + + if (!(inode->i_sb->s_flags & SB_LAZYTIME)) + return; + if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME)) + return; + + if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp)) + return; + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); + xfs_trans_commit(tp); +} + /* * Slab object creation initialisation for the XFS inode. * This covers only the idempotent fields in the XFS inode; @@ -1566,6 +1579,31 @@ xfs_destroy_percpu_counters( percpu_counter_destroy(&mp->m_fdblocks); } +static struct xfs_mount * +xfs_mount_alloc( + struct super_block *sb) +{ + struct xfs_mount *mp; + + mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); + if (!mp) + return NULL; + + mp->m_super = sb; + spin_lock_init(&mp->m_sb_lock); + spin_lock_init(&mp->m_agirotor_lock); + INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); + spin_lock_init(&mp->m_perag_lock); + mutex_init(&mp->m_growlock); + atomic_set(&mp->m_active_trans, 0); + INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); + INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); + INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker); + mp->m_kobj.kobject.kset = xfs_kset; + return mp; +} + + STATIC int xfs_fs_fill_super( struct super_block *sb, @@ -1576,19 +1614,13 @@ xfs_fs_fill_super( struct xfs_mount *mp = NULL; int flags = 0, error = -ENOMEM; - mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); + /* + * allocate mp and do all low-level struct initializations before we + * attach it to the super + */ + mp = xfs_mount_alloc(sb); if (!mp) goto out; - - spin_lock_init(&mp->m_sb_lock); - mutex_init(&mp->m_growlock); - atomic_set(&mp->m_active_trans, 0); - INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); - INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); - INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker); - mp->m_kobj.kobject.kset = xfs_kset; - - mp->m_super = sb; sb->s_fs_info = mp; error = xfs_parseargs(mp, (char *)data); @@ -1789,6 +1821,7 @@ xfs_fs_free_cached_objects( static const struct super_operations xfs_super_operations = { .alloc_inode = xfs_fs_alloc_inode, .destroy_inode = xfs_fs_destroy_inode, + .dirty_inode = xfs_fs_dirty_inode, .drop_inode = xfs_fs_drop_inode, .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 945de08af7ba..a982c0b623d0 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1477,7 +1477,7 @@ TRACE_EVENT(xfs_extent_busy_trim, __entry->tlen) ); -TRACE_EVENT(xfs_agf, +DECLARE_EVENT_CLASS(xfs_agf_class, TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, unsigned long caller_ip), TP_ARGS(mp, agf, flags, caller_ip), @@ -1533,6 +1533,13 @@ TRACE_EVENT(xfs_agf, __entry->longest, (void *)__entry->caller_ip) ); +#define DEFINE_AGF_EVENT(name) \ +DEFINE_EVENT(xfs_agf_class, name, \ + TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, \ + unsigned long caller_ip), \ + TP_ARGS(mp, agf, flags, caller_ip)) +DEFINE_AGF_EVENT(xfs_agf); +DEFINE_AGF_EVENT(xfs_agfl_reset); TRACE_EVENT(xfs_free_extent, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 86f92df32c42..d6d8f9d129a7 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -119,8 +119,11 @@ xfs_trans_dup( /* We gave our writer reference to the new transaction */ tp->t_flags |= XFS_TRANS_NO_WRITECOUNT; ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); + + ASSERT(tp->t_blk_res >= tp->t_blk_res_used); ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; tp->t_blk_res = tp->t_blk_res_used; + ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; tp->t_rtx_res = tp->t_rtx_res_used; ntp->t_pflags = tp->t_pflags; @@ -344,13 +347,14 @@ xfs_trans_mod_sb( break; case XFS_TRANS_SB_FDBLOCKS: /* - * Track the number of blocks allocated in the - * transaction. Make sure it does not exceed the - * number reserved. + * Track the number of blocks allocated in the transaction. + * Make sure it does not exceed the number reserved. If so, + * shutdown as this can lead to accounting inconsistency. */ if (delta < 0) { tp->t_blk_res_used += (uint)-delta; - ASSERT(tp->t_blk_res_used <= tp->t_blk_res); + if (tp->t_blk_res_used > tp->t_blk_res) + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); } tp->t_fdblocks_delta += delta; if (xfs_sb_version_haslazysbcount(&mp->m_sb)) @@ -803,8 +807,8 @@ xfs_log_item_batch_insert( { int i; - spin_lock(&ailp->xa_lock); - /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ + spin_lock(&ailp->ail_lock); + /* xfs_trans_ail_update_bulk drops ailp->ail_lock */ xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn); for (i = 0; i < nr_items; i++) { @@ -847,9 +851,9 @@ xfs_trans_committed_bulk( struct xfs_ail_cursor cur; int i = 0; - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn); - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); /* unpin all the log items */ for (lv = log_vector; lv; lv = lv->lv_next ) { @@ -869,7 +873,7 @@ xfs_trans_committed_bulk( * object into the AIL as we are in a shutdown situation. */ if (aborted) { - ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount)); + ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount)); lip->li_ops->iop_unpin(lip, 1); continue; } @@ -883,11 +887,11 @@ xfs_trans_committed_bulk( * not affect the AIL cursor the bulk insert path is * using. */ - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) xfs_trans_ail_update(ailp, lip, item_lsn); else - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); lip->li_ops->iop_unpin(lip, 0); continue; } @@ -905,9 +909,9 @@ xfs_trans_committed_bulk( if (i) xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); xfs_trans_ail_cursor_done(&cur); - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); } /* @@ -966,7 +970,7 @@ __xfs_trans_commit( * log out now and wait for it. */ if (sync) { - error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL); + error = xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL); XFS_STATS_INC(mp, xs_trans_sync); } else { XFS_STATS_INC(mp, xs_trans_async); diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index cef89f7127d3..d4a2445215e6 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -40,7 +40,7 @@ xfs_ail_check( { xfs_log_item_t *prev_lip; - if (list_empty(&ailp->xa_ail)) + if (list_empty(&ailp->ail_head)) return; /* @@ -48,11 +48,11 @@ xfs_ail_check( */ ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); - if (&prev_lip->li_ail != &ailp->xa_ail) + if (&prev_lip->li_ail != &ailp->ail_head) ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); - if (&prev_lip->li_ail != &ailp->xa_ail) + if (&prev_lip->li_ail != &ailp->ail_head) ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); @@ -69,10 +69,10 @@ static xfs_log_item_t * xfs_ail_max( struct xfs_ail *ailp) { - if (list_empty(&ailp->xa_ail)) + if (list_empty(&ailp->ail_head)) return NULL; - return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail); + return list_entry(ailp->ail_head.prev, xfs_log_item_t, li_ail); } /* @@ -84,7 +84,7 @@ xfs_ail_next( struct xfs_ail *ailp, xfs_log_item_t *lip) { - if (lip->li_ail.next == &ailp->xa_ail) + if (lip->li_ail.next == &ailp->ail_head) return NULL; return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); @@ -105,11 +105,11 @@ xfs_ail_min_lsn( xfs_lsn_t lsn = 0; xfs_log_item_t *lip; - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); lip = xfs_ail_min(ailp); if (lip) lsn = lip->li_lsn; - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); return lsn; } @@ -124,11 +124,11 @@ xfs_ail_max_lsn( xfs_lsn_t lsn = 0; xfs_log_item_t *lip; - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); lip = xfs_ail_max(ailp); if (lip) lsn = lip->li_lsn; - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); return lsn; } @@ -146,7 +146,7 @@ xfs_trans_ail_cursor_init( struct xfs_ail_cursor *cur) { cur->item = NULL; - list_add_tail(&cur->list, &ailp->xa_cursors); + list_add_tail(&cur->list, &ailp->ail_cursors); } /* @@ -194,7 +194,7 @@ xfs_trans_ail_cursor_clear( { struct xfs_ail_cursor *cur; - list_for_each_entry(cur, &ailp->xa_cursors, list) { + list_for_each_entry(cur, &ailp->ail_cursors, list) { if (cur->item == lip) cur->item = (struct xfs_log_item *) ((uintptr_t)cur->item | 1); @@ -222,7 +222,7 @@ xfs_trans_ail_cursor_first( goto out; } - list_for_each_entry(lip, &ailp->xa_ail, li_ail) { + list_for_each_entry(lip, &ailp->ail_head, li_ail) { if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) goto out; } @@ -241,7 +241,7 @@ __xfs_trans_ail_cursor_last( { xfs_log_item_t *lip; - list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) { + list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) { if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0) return lip; } @@ -310,7 +310,7 @@ xfs_ail_splice( if (lip) list_splice(list, &lip->li_ail); else - list_splice(list, &ailp->xa_ail); + list_splice(list, &ailp->ail_head); } /* @@ -335,17 +335,17 @@ xfsaild_push_item( * If log item pinning is enabled, skip the push and track the item as * pinned. This can help induce head-behind-tail conditions. */ - if (XFS_TEST_ERROR(false, ailp->xa_mount, XFS_ERRTAG_LOG_ITEM_PIN)) + if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN)) return XFS_ITEM_PINNED; - return lip->li_ops->iop_push(lip, &ailp->xa_buf_list); + return lip->li_ops->iop_push(lip, &ailp->ail_buf_list); } static long xfsaild_push( struct xfs_ail *ailp) { - xfs_mount_t *mp = ailp->xa_mount; + xfs_mount_t *mp = ailp->ail_mount; struct xfs_ail_cursor cur; xfs_log_item_t *lip; xfs_lsn_t lsn; @@ -360,30 +360,30 @@ xfsaild_push( * buffers the last time we ran, force the log first and wait for it * before pushing again. */ - if (ailp->xa_log_flush && ailp->xa_last_pushed_lsn == 0 && - (!list_empty_careful(&ailp->xa_buf_list) || + if (ailp->ail_log_flush && ailp->ail_last_pushed_lsn == 0 && + (!list_empty_careful(&ailp->ail_buf_list) || xfs_ail_min_lsn(ailp))) { - ailp->xa_log_flush = 0; + ailp->ail_log_flush = 0; XFS_STATS_INC(mp, xs_push_ail_flush); xfs_log_force(mp, XFS_LOG_SYNC); } - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); - /* barrier matches the xa_target update in xfs_ail_push() */ + /* barrier matches the ail_target update in xfs_ail_push() */ smp_rmb(); - target = ailp->xa_target; - ailp->xa_target_prev = target; + target = ailp->ail_target; + ailp->ail_target_prev = target; - lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn); + lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->ail_last_pushed_lsn); if (!lip) { /* * If the AIL is empty or our push has reached the end we are * done now. */ xfs_trans_ail_cursor_done(&cur); - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); goto out_done; } @@ -404,7 +404,7 @@ xfsaild_push( XFS_STATS_INC(mp, xs_push_ail_success); trace_xfs_ail_push(lip); - ailp->xa_last_pushed_lsn = lsn; + ailp->ail_last_pushed_lsn = lsn; break; case XFS_ITEM_FLUSHING: @@ -423,7 +423,7 @@ xfsaild_push( trace_xfs_ail_flushing(lip); flushing++; - ailp->xa_last_pushed_lsn = lsn; + ailp->ail_last_pushed_lsn = lsn; break; case XFS_ITEM_PINNED: @@ -431,7 +431,7 @@ xfsaild_push( trace_xfs_ail_pinned(lip); stuck++; - ailp->xa_log_flush++; + ailp->ail_log_flush++; break; case XFS_ITEM_LOCKED: XFS_STATS_INC(mp, xs_push_ail_locked); @@ -468,10 +468,10 @@ xfsaild_push( lsn = lip->li_lsn; } xfs_trans_ail_cursor_done(&cur); - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); - if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list)) - ailp->xa_log_flush++; + if (xfs_buf_delwri_submit_nowait(&ailp->ail_buf_list)) + ailp->ail_log_flush++; if (!count || XFS_LSN_CMP(lsn, target) >= 0) { out_done: @@ -481,7 +481,7 @@ out_done: * AIL before we start the next scan from the start of the AIL. */ tout = 50; - ailp->xa_last_pushed_lsn = 0; + ailp->ail_last_pushed_lsn = 0; } else if (((stuck + flushing) * 100) / count > 90) { /* * Either there is a lot of contention on the AIL or we are @@ -494,7 +494,7 @@ out_done: * the restart to issue a log force to unpin the stuck items. */ tout = 20; - ailp->xa_last_pushed_lsn = 0; + ailp->ail_last_pushed_lsn = 0; } else { /* * Assume we have more work to do in a short while. @@ -536,26 +536,26 @@ xfsaild( break; } - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); /* * Idle if the AIL is empty and we are not racing with a target * update. We check the AIL after we set the task to a sleep - * state to guarantee that we either catch an xa_target update + * state to guarantee that we either catch an ail_target update * or that a wake_up resets the state to TASK_RUNNING. * Otherwise, we run the risk of sleeping indefinitely. * - * The barrier matches the xa_target update in xfs_ail_push(). + * The barrier matches the ail_target update in xfs_ail_push(). */ smp_rmb(); if (!xfs_ail_min(ailp) && - ailp->xa_target == ailp->xa_target_prev) { - spin_unlock(&ailp->xa_lock); + ailp->ail_target == ailp->ail_target_prev) { + spin_unlock(&ailp->ail_lock); freezable_schedule(); tout = 0; continue; } - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); if (tout) freezable_schedule_timeout(msecs_to_jiffies(tout)); @@ -592,8 +592,8 @@ xfs_ail_push( xfs_log_item_t *lip; lip = xfs_ail_min(ailp); - if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) || - XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0) + if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) || + XFS_LSN_CMP(threshold_lsn, ailp->ail_target) <= 0) return; /* @@ -601,10 +601,10 @@ xfs_ail_push( * the XFS_AIL_PUSHING_BIT. */ smp_wmb(); - xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn); + xfs_trans_ail_copy_lsn(ailp, &ailp->ail_target, &threshold_lsn); smp_wmb(); - wake_up_process(ailp->xa_task); + wake_up_process(ailp->ail_task); } /* @@ -630,18 +630,18 @@ xfs_ail_push_all_sync( struct xfs_log_item *lip; DEFINE_WAIT(wait); - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); while ((lip = xfs_ail_max(ailp)) != NULL) { - prepare_to_wait(&ailp->xa_empty, &wait, TASK_UNINTERRUPTIBLE); - ailp->xa_target = lip->li_lsn; - wake_up_process(ailp->xa_task); - spin_unlock(&ailp->xa_lock); + prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE); + ailp->ail_target = lip->li_lsn; + wake_up_process(ailp->ail_task); + spin_unlock(&ailp->ail_lock); schedule(); - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); } - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); - finish_wait(&ailp->xa_empty, &wait); + finish_wait(&ailp->ail_empty, &wait); } /* @@ -672,7 +672,7 @@ xfs_trans_ail_update_bulk( struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, - xfs_lsn_t lsn) __releases(ailp->xa_lock) + xfs_lsn_t lsn) __releases(ailp->ail_lock) { xfs_log_item_t *mlip; int mlip_changed = 0; @@ -705,13 +705,13 @@ xfs_trans_ail_update_bulk( xfs_ail_splice(ailp, cur, &tmp, lsn); if (mlip_changed) { - if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount)) - xlog_assign_tail_lsn_locked(ailp->xa_mount); - spin_unlock(&ailp->xa_lock); + if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount)) + xlog_assign_tail_lsn_locked(ailp->ail_mount); + spin_unlock(&ailp->ail_lock); - xfs_log_space_wake(ailp->xa_mount); + xfs_log_space_wake(ailp->ail_mount); } else { - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); } } @@ -756,13 +756,13 @@ void xfs_trans_ail_delete( struct xfs_ail *ailp, struct xfs_log_item *lip, - int shutdown_type) __releases(ailp->xa_lock) + int shutdown_type) __releases(ailp->ail_lock) { - struct xfs_mount *mp = ailp->xa_mount; + struct xfs_mount *mp = ailp->ail_mount; bool mlip_changed; if (!(lip->li_flags & XFS_LI_IN_AIL)) { - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); if (!XFS_FORCED_SHUTDOWN(mp)) { xfs_alert_tag(mp, XFS_PTAG_AILDELETE, "%s: attempting to delete a log item that is not in the AIL", @@ -776,13 +776,13 @@ xfs_trans_ail_delete( if (mlip_changed) { if (!XFS_FORCED_SHUTDOWN(mp)) xlog_assign_tail_lsn_locked(mp); - if (list_empty(&ailp->xa_ail)) - wake_up_all(&ailp->xa_empty); + if (list_empty(&ailp->ail_head)) + wake_up_all(&ailp->ail_empty); } - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); if (mlip_changed) - xfs_log_space_wake(ailp->xa_mount); + xfs_log_space_wake(ailp->ail_mount); } int @@ -795,16 +795,16 @@ xfs_trans_ail_init( if (!ailp) return -ENOMEM; - ailp->xa_mount = mp; - INIT_LIST_HEAD(&ailp->xa_ail); - INIT_LIST_HEAD(&ailp->xa_cursors); - spin_lock_init(&ailp->xa_lock); - INIT_LIST_HEAD(&ailp->xa_buf_list); - init_waitqueue_head(&ailp->xa_empty); + ailp->ail_mount = mp; + INIT_LIST_HEAD(&ailp->ail_head); + INIT_LIST_HEAD(&ailp->ail_cursors); + spin_lock_init(&ailp->ail_lock); + INIT_LIST_HEAD(&ailp->ail_buf_list); + init_waitqueue_head(&ailp->ail_empty); - ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", - ailp->xa_mount->m_fsname); - if (IS_ERR(ailp->xa_task)) + ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s", + ailp->ail_mount->m_fsname); + if (IS_ERR(ailp->ail_task)) goto out_free_ailp; mp->m_ail = ailp; @@ -821,6 +821,6 @@ xfs_trans_ail_destroy( { struct xfs_ail *ailp = mp->m_ail; - kthread_stop(ailp->xa_task); + kthread_stop(ailp->ail_task); kmem_free(ailp); } diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 653ce379d36b..a5d9dfc45d98 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -431,8 +431,8 @@ xfs_trans_brelse( * If the fs has shutdown and we dropped the last reference, it may fall * on us to release a (possibly dirty) bli if it never made it to the * AIL (e.g., the aborted unpin already happened and didn't release it - * due to our reference). Since we're already shutdown and need xa_lock, - * just force remove from the AIL and release the bli here. + * due to our reference). Since we're already shutdown and need + * ail_lock, just force remove from the AIL and release the bli here. */ if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) { xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR); diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 4a89da4b6fe7..07cea592dc01 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -98,9 +98,23 @@ xfs_trans_log_inode( xfs_inode_t *ip, uint flags) { + struct inode *inode = VFS_I(ip); + ASSERT(ip->i_itemp != NULL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + /* + * Don't bother with i_lock for the I_DIRTY_TIME check here, as races + * don't matter - we either will need an extra transaction in 24 hours + * to log the timestamps, or will clear already cleared fields in the + * worst case. + */ + if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) { + spin_lock(&inode->i_lock); + inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED); + spin_unlock(&inode->i_lock); + } + /* * Record the specific change for fdatasync optimisation. This * allows fdatasync to skip log forces for inodes that are only diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index b317a3644c00..be24b0c8a332 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -65,17 +65,17 @@ struct xfs_ail_cursor { * Eventually we need to drive the locking in here as well. */ struct xfs_ail { - struct xfs_mount *xa_mount; - struct task_struct *xa_task; - struct list_head xa_ail; - xfs_lsn_t xa_target; - xfs_lsn_t xa_target_prev; - struct list_head xa_cursors; - spinlock_t xa_lock; - xfs_lsn_t xa_last_pushed_lsn; - int xa_log_flush; - struct list_head xa_buf_list; - wait_queue_head_t xa_empty; + struct xfs_mount *ail_mount; + struct task_struct *ail_task; + struct list_head ail_head; + xfs_lsn_t ail_target; + xfs_lsn_t ail_target_prev; + struct list_head ail_cursors; + spinlock_t ail_lock; + xfs_lsn_t ail_last_pushed_lsn; + int ail_log_flush; + struct list_head ail_buf_list; + wait_queue_head_t ail_empty; }; /* @@ -84,7 +84,7 @@ struct xfs_ail { void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, - xfs_lsn_t lsn) __releases(ailp->xa_lock); + xfs_lsn_t lsn) __releases(ailp->ail_lock); /* * Return a pointer to the first item in the AIL. If the AIL is empty, then * return NULL. @@ -93,7 +93,7 @@ static inline struct xfs_log_item * xfs_ail_min( struct xfs_ail *ailp) { - return list_first_entry_or_null(&ailp->xa_ail, struct xfs_log_item, + return list_first_entry_or_null(&ailp->ail_head, struct xfs_log_item, li_ail); } @@ -101,14 +101,14 @@ static inline void xfs_trans_ail_update( struct xfs_ail *ailp, struct xfs_log_item *lip, - xfs_lsn_t lsn) __releases(ailp->xa_lock) + xfs_lsn_t lsn) __releases(ailp->ail_lock) { xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); } bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip); void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip, - int shutdown_type) __releases(ailp->xa_lock); + int shutdown_type) __releases(ailp->ail_lock); static inline void xfs_trans_ail_remove( @@ -117,12 +117,12 @@ xfs_trans_ail_remove( { struct xfs_ail *ailp = lip->li_ailp; - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); /* xfs_trans_ail_delete() drops the AIL lock */ if (lip->li_flags & XFS_LI_IN_AIL) xfs_trans_ail_delete(ailp, lip, shutdown_type); else - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); } void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); @@ -149,9 +149,9 @@ xfs_trans_ail_copy_lsn( xfs_lsn_t *src) { ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */ - spin_lock(&ailp->xa_lock); + spin_lock(&ailp->ail_lock); *dst = *src; - spin_unlock(&ailp->xa_lock); + spin_unlock(&ailp->ail_lock); } #else static inline void @@ -172,7 +172,7 @@ xfs_clear_li_failed( struct xfs_buf *bp = lip->li_buf; ASSERT(lip->li_flags & XFS_LI_IN_AIL); - lockdep_assert_held(&lip->li_ailp->xa_lock); + lockdep_assert_held(&lip->li_ailp->ail_lock); if (lip->li_flags & XFS_LI_FAILED) { lip->li_flags &= ~XFS_LI_FAILED; @@ -186,7 +186,7 @@ xfs_set_li_failed( struct xfs_log_item *lip, struct xfs_buf *bp) { - lockdep_assert_held(&lip->li_ailp->xa_lock); + lockdep_assert_held(&lip->li_ailp->ail_lock); if (!(lip->li_flags & XFS_LI_FAILED)) { xfs_buf_hold(bp);