diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index e841ed781a25..e986b95d94c9 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -93,25 +93,3 @@ kmem_alloc_large(size_t size, xfs_km_flags_t flags) return ptr; return __kmem_vmalloc(size, flags); } - -void * -kmem_realloc(const void *old, size_t newsize, xfs_km_flags_t flags) -{ - int retries = 0; - gfp_t lflags = kmem_flags_convert(flags); - void *ptr; - - trace_kmem_realloc(newsize, flags, _RET_IP_); - - do { - ptr = krealloc(old, newsize, lflags); - if (ptr || (flags & KM_MAYFAIL)) - return ptr; - if (!(++retries % 100)) - xfs_err(NULL, - "%s(%u) possible memory allocation deadlock size %zu in %s (mode:0x%x)", - current->comm, current->pid, - newsize, __func__, lflags); - congestion_wait(BLK_RW_ASYNC, HZ/50); - } while (1); -} diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 8e8555817e6d..38007117697e 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -59,7 +59,6 @@ kmem_flags_convert(xfs_km_flags_t flags) extern void *kmem_alloc(size_t, xfs_km_flags_t); extern void *kmem_alloc_io(size_t size, int align_mask, xfs_km_flags_t flags); extern void *kmem_alloc_large(size_t size, xfs_km_flags_t); -extern void *kmem_realloc(const void *, size_t, xfs_km_flags_t); static inline void kmem_free(const void *ptr) { kvfree(ptr); @@ -72,12 +71,6 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags) return kmem_alloc(size, flags | KM_ZERO); } -static inline void * -kmem_zalloc_large(size_t size, xfs_km_flags_t flags) -{ - return kmem_alloc_large(size, flags | KM_ZERO); -} - /* * Zone interfaces */ diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 8cf73fe4338e..9331f3516afa 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -333,6 +333,11 @@ xfs_agiblock_init( } for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); + if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) { + agi->agi_iblocks = cpu_to_be32(1); + if (xfs_sb_version_hasfinobt(&mp->m_sb)) + agi->agi_fblocks = cpu_to_be32(1); + } } typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp, diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 2e055c079f39..fd8e6418a0d3 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -428,7 +428,7 @@ xfs_attr_set( */ if (XFS_IFORK_Q(dp) == 0) { int sf_size = sizeof(struct xfs_attr_sf_hdr) + - XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, + xfs_attr_sf_entsize_byname(args->namelen, args->valuelen); error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); @@ -523,6 +523,14 @@ out_trans_cancel: * External routines when attribute list is inside the inode *========================================================================*/ +static inline int xfs_attr_sf_totsize(struct xfs_inode *dp) +{ + struct xfs_attr_shortform *sf; + + sf = (struct xfs_attr_shortform *)dp->i_afp->if_u1.if_data; + return be16_to_cpu(sf->hdr.totsize); +} + /* * Add a name to the shortform attribute list structure * This is the external routine. @@ -555,8 +563,8 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX) return -ENOSPC; - newsize = XFS_ATTR_SF_TOTSIZE(args->dp); - newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen); + newsize = xfs_attr_sf_totsize(args->dp); + newsize += xfs_attr_sf_entsize_byname(args->namelen, args->valuelen); forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize); if (!forkoff) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 305d4bc07337..bb128db220ac 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -684,9 +684,9 @@ xfs_attr_sf_findname( sf = (struct xfs_attr_shortform *)args->dp->i_afp->if_u1.if_data; sfe = &sf->list[0]; end = sf->hdr.count; - for (i = 0; i < end; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), + for (i = 0; i < end; sfe = xfs_attr_sf_nextentry(sfe), base += size, i++) { - size = XFS_ATTR_SF_ENTSIZE(sfe); + size = xfs_attr_sf_entsize(sfe); if (!xfs_attr_match(args, sfe->namelen, sfe->nameval, sfe->flags)) continue; @@ -728,15 +728,15 @@ xfs_attr_shortform_add( ifp = dp->i_afp; ASSERT(ifp->if_flags & XFS_IFINLINE); - sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; + sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data; if (xfs_attr_sf_findname(args, &sfe, NULL) == -EEXIST) ASSERT(0); offset = (char *)sfe - (char *)sf; - size = XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen); + size = xfs_attr_sf_entsize_byname(args->namelen, args->valuelen); xfs_idata_realloc(dp, size, XFS_ATTR_FORK); - sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; - sfe = (xfs_attr_sf_entry_t *)((char *)sf + offset); + sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data; + sfe = (struct xfs_attr_sf_entry *)((char *)sf + offset); sfe->namelen = args->namelen; sfe->valuelen = args->valuelen; @@ -787,12 +787,12 @@ xfs_attr_shortform_remove( dp = args->dp; mp = dp->i_mount; - sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data; + sf = (struct xfs_attr_shortform *)dp->i_afp->if_u1.if_data; error = xfs_attr_sf_findname(args, &sfe, &base); if (error != -EEXIST) return error; - size = XFS_ATTR_SF_ENTSIZE(sfe); + size = xfs_attr_sf_entsize(sfe); /* * Fix up the attribute fork data, covering the hole @@ -837,8 +837,8 @@ xfs_attr_shortform_remove( int xfs_attr_shortform_lookup(xfs_da_args_t *args) { - xfs_attr_shortform_t *sf; - xfs_attr_sf_entry_t *sfe; + struct xfs_attr_shortform *sf; + struct xfs_attr_sf_entry *sfe; int i; struct xfs_ifork *ifp; @@ -846,10 +846,10 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) ifp = args->dp->i_afp; ASSERT(ifp->if_flags & XFS_IFINLINE); - sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; + sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data; sfe = &sf->list[0]; for (i = 0; i < sf->hdr.count; - sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { + sfe = xfs_attr_sf_nextentry(sfe), i++) { if (xfs_attr_match(args, sfe->namelen, sfe->nameval, sfe->flags)) return -EEXIST; @@ -873,10 +873,10 @@ xfs_attr_shortform_getvalue( int i; ASSERT(args->dp->i_afp->if_flags == XFS_IFINLINE); - sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data; + sf = (struct xfs_attr_shortform *)args->dp->i_afp->if_u1.if_data; sfe = &sf->list[0]; for (i = 0; i < sf->hdr.count; - sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { + sfe = xfs_attr_sf_nextentry(sfe), i++) { if (xfs_attr_match(args, sfe->namelen, sfe->nameval, sfe->flags)) return xfs_attr_copy_value(args, @@ -908,12 +908,12 @@ xfs_attr_shortform_to_leaf( dp = args->dp; ifp = dp->i_afp; - sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; + sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data; size = be16_to_cpu(sf->hdr.totsize); tmpbuffer = kmem_alloc(size, 0); ASSERT(tmpbuffer != NULL); memcpy(tmpbuffer, ifp->if_u1.if_data, size); - sf = (xfs_attr_shortform_t *)tmpbuffer; + sf = (struct xfs_attr_shortform *)tmpbuffer; xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); xfs_bmap_local_to_extents_empty(args->trans, dp, XFS_ATTR_FORK); @@ -951,7 +951,7 @@ xfs_attr_shortform_to_leaf( ASSERT(error != -ENOSPC); if (error) goto out; - sfe = XFS_ATTR_SF_NEXTENTRY(sfe); + sfe = xfs_attr_sf_nextentry(sfe); } error = 0; *leaf_bp = bp; @@ -992,9 +992,8 @@ xfs_attr_shortform_allfit( return 0; if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX) return 0; - bytes += sizeof(struct xfs_attr_sf_entry) - 1 - + name_loc->namelen - + be16_to_cpu(name_loc->valuelen); + bytes += xfs_attr_sf_entsize_byname(name_loc->namelen, + be16_to_cpu(name_loc->valuelen)); } if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) && (dp->i_df.if_format != XFS_DINODE_FMT_BTREE) && @@ -1039,7 +1038,7 @@ xfs_attr_shortform_verify( * xfs_attr_sf_entry is defined with a 1-byte variable * array at the end, so we must subtract that off. */ - if (((char *)sfep + sizeof(*sfep) - 1) >= endp) + if (((char *)sfep + sizeof(*sfep)) >= endp) return __this_address; /* Don't allow names with known bad length. */ @@ -1051,7 +1050,7 @@ xfs_attr_shortform_verify( * within the data buffer. The next entry starts after the * name component, so nextentry is an acceptable test. */ - next_sfep = XFS_ATTR_SF_NEXTENTRY(sfep); + next_sfep = xfs_attr_sf_nextentry(sfep); if ((char *)next_sfep > endp) return __this_address; diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h index bb004fb7944a..37578b369d9b 100644 --- a/fs/xfs/libxfs/xfs_attr_sf.h +++ b/fs/xfs/libxfs/xfs_attr_sf.h @@ -13,7 +13,6 @@ * to fit into the literal area of the inode. */ typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t; -typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t; /* * We generate this then sort it, attr_list() must return things in hash-order. @@ -27,16 +26,26 @@ typedef struct xfs_attr_sf_sort { unsigned char *name; /* name value, pointer into buffer */ } xfs_attr_sf_sort_t; -#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \ - (((int)sizeof(xfs_attr_sf_entry_t)-1 + (nlen)+(vlen))) #define XFS_ATTR_SF_ENTSIZE_MAX /* max space for name&value */ \ ((1 << (NBBY*(int)sizeof(uint8_t))) - 1) -#define XFS_ATTR_SF_ENTSIZE(sfep) /* space an entry uses */ \ - ((int)sizeof(xfs_attr_sf_entry_t)-1 + (sfep)->namelen+(sfep)->valuelen) -#define XFS_ATTR_SF_NEXTENTRY(sfep) /* next entry in struct */ \ - ((xfs_attr_sf_entry_t *)((char *)(sfep) + XFS_ATTR_SF_ENTSIZE(sfep))) -#define XFS_ATTR_SF_TOTSIZE(dp) /* total space in use */ \ - (be16_to_cpu(((xfs_attr_shortform_t *) \ - ((dp)->i_afp->if_u1.if_data))->hdr.totsize)) + +/* space name/value uses */ +static inline int xfs_attr_sf_entsize_byname(uint8_t nlen, uint8_t vlen) +{ + return sizeof(struct xfs_attr_sf_entry) + nlen + vlen; +} + +/* space an entry uses */ +static inline int xfs_attr_sf_entsize(struct xfs_attr_sf_entry *sfep) +{ + return struct_size(sfep, nameval, sfep->namelen + sfep->valuelen); +} + +/* next entry in struct */ +static inline struct xfs_attr_sf_entry * +xfs_attr_sf_nextentry(struct xfs_attr_sf_entry *sfep) +{ + return (void *)sfep + xfs_attr_sf_entsize(sfep); +} #endif /* __XFS_ATTR_SF_H__ */ diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 059ac108b1b3..b40a4e80f5ee 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -579,7 +579,7 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp) /* * Entries are packed toward the top as tight as possible. */ -typedef struct xfs_attr_shortform { +struct xfs_attr_shortform { struct xfs_attr_sf_hdr { /* constant-structure header block */ __be16 totsize; /* total bytes in shortform list */ __u8 count; /* count of active entries */ @@ -589,9 +589,9 @@ typedef struct xfs_attr_shortform { uint8_t namelen; /* actual length of name (no NULL) */ uint8_t valuelen; /* actual length of value (no NULL) */ uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ - uint8_t nameval[1]; /* name & value bytes concatenated */ + uint8_t nameval[]; /* name & value bytes concatenated */ } list[1]; /* variable sized array */ -} xfs_attr_shortform_t; +}; typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */ __be16 base; /* base of free region */ diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 5a2db00b9d5f..6766417d5ba4 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -69,6 +69,13 @@ xfs_dquot_verify( ddq_type != XFS_DQTYPE_GROUP) return __this_address; + if ((ddq->d_type & XFS_DQTYPE_BIGTIME) && + !xfs_sb_version_hasbigtime(&mp->m_sb)) + return __this_address; + + if ((ddq->d_type & XFS_DQTYPE_BIGTIME) && !ddq->d_id) + return __this_address; + if (id != -1 && id != be32_to_cpu(ddq->d_id)) return __this_address; @@ -288,3 +295,31 @@ const struct xfs_buf_ops xfs_dquot_buf_ra_ops = { .verify_read = xfs_dquot_buf_readahead_verify, .verify_write = xfs_dquot_buf_write_verify, }; + +/* Convert an on-disk timer value into an incore timer value. */ +time64_t +xfs_dquot_from_disk_ts( + struct xfs_disk_dquot *ddq, + __be32 dtimer) +{ + uint32_t t = be32_to_cpu(dtimer); + + if (t != 0 && (ddq->d_type & XFS_DQTYPE_BIGTIME)) + return xfs_dq_bigtime_to_unix(t); + + return t; +} + +/* Convert an incore timer value into an on-disk timer value. */ +__be32 +xfs_dquot_to_disk_ts( + struct xfs_dquot *dqp, + time64_t timer) +{ + uint32_t t = timer; + + if (timer != 0 && (dqp->q_type & XFS_DQTYPE_BIGTIME)) + t = xfs_dq_unix_to_bigtime(timer); + + return cpu_to_be32(t); +} diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 31b7ece985bb..dd764da08f6f 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -449,10 +449,12 @@ xfs_sb_has_compat_feature( #define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ #define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */ #define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */ +#define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3) /* inobt block counts */ #define XFS_SB_FEAT_RO_COMPAT_ALL \ (XFS_SB_FEAT_RO_COMPAT_FINOBT | \ XFS_SB_FEAT_RO_COMPAT_RMAPBT | \ - XFS_SB_FEAT_RO_COMPAT_REFLINK) + XFS_SB_FEAT_RO_COMPAT_REFLINK| \ + XFS_SB_FEAT_RO_COMPAT_INOBTCNT) #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL static inline bool xfs_sb_has_ro_compat_feature( @@ -465,10 +467,12 @@ xfs_sb_has_ro_compat_feature( #define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */ #define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */ #define XFS_SB_FEAT_INCOMPAT_META_UUID (1 << 2) /* metadata UUID */ +#define XFS_SB_FEAT_INCOMPAT_BIGTIME (1 << 3) /* large timestamps */ #define XFS_SB_FEAT_INCOMPAT_ALL \ (XFS_SB_FEAT_INCOMPAT_FTYPE| \ XFS_SB_FEAT_INCOMPAT_SPINODES| \ - XFS_SB_FEAT_INCOMPAT_META_UUID) + XFS_SB_FEAT_INCOMPAT_META_UUID| \ + XFS_SB_FEAT_INCOMPAT_BIGTIME) #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL static inline bool @@ -563,6 +567,23 @@ static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp) (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK); } +static inline bool xfs_sb_version_hasbigtime(struct xfs_sb *sbp) +{ + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_BIGTIME); +} + +/* + * Inode btree block counter. We record the number of inobt and finobt blocks + * in the AGI header so that we can skip the finobt walk at mount time when + * setting up per-AG reservations. + */ +static inline bool xfs_sb_version_hasinobtcounts(struct xfs_sb *sbp) +{ + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && + (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT); +} + /* * end of superblock version macros */ @@ -765,6 +786,9 @@ typedef struct xfs_agi { __be32 agi_free_root; /* root of the free inode btree */ __be32 agi_free_level;/* levels in free inode btree */ + __be32 agi_iblocks; /* inobt blocks used */ + __be32 agi_fblocks; /* finobt blocks used */ + /* structure must be padded to 64 bit alignment */ } xfs_agi_t; @@ -785,7 +809,8 @@ typedef struct xfs_agi { #define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1) #define XFS_AGI_FREE_ROOT (1 << 11) #define XFS_AGI_FREE_LEVEL (1 << 12) -#define XFS_AGI_NUM_BITS_R2 13 +#define XFS_AGI_IBLOCKS (1 << 13) /* both inobt/finobt block counters */ +#define XFS_AGI_NUM_BITS_R2 14 /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) @@ -831,10 +856,87 @@ struct xfs_agfl { ASSERT(xfs_daddr_to_agno(mp, d) == \ xfs_daddr_to_agno(mp, (d) + (len) - 1))) -typedef struct xfs_timestamp { +/* + * XFS Timestamps + * ============== + * + * Traditional ondisk inode timestamps consist of signed 32-bit counters for + * seconds and nanoseconds; time zero is the Unix epoch, Jan 1 00:00:00 UTC + * 1970, which means that the timestamp epoch is the same as the Unix epoch. + * Therefore, the ondisk min and max defined here can be used directly to + * constrain the incore timestamps on a Unix system. Note that we actually + * encode a __be64 value on disk. + * + * When the bigtime feature is enabled, ondisk inode timestamps become an + * unsigned 64-bit nanoseconds counter. This means that the bigtime inode + * timestamp epoch is the start of the classic timestamp range, which is + * Dec 31 20:45:52 UTC 1901. Because the epochs are not the same, callers + * /must/ use the bigtime conversion functions when encoding and decoding raw + * timestamps. + */ +typedef __be64 xfs_timestamp_t; + +/* Legacy timestamp encoding format. */ +struct xfs_legacy_timestamp { __be32 t_sec; /* timestamp seconds */ __be32 t_nsec; /* timestamp nanoseconds */ -} xfs_timestamp_t; +}; + +/* + * Smallest possible ondisk seconds value with traditional timestamps. This + * corresponds exactly with the incore timestamp Dec 13 20:45:52 UTC 1901. + */ +#define XFS_LEGACY_TIME_MIN ((int64_t)S32_MIN) + +/* + * Largest possible ondisk seconds value with traditional timestamps. This + * corresponds exactly with the incore timestamp Jan 19 03:14:07 UTC 2038. + */ +#define XFS_LEGACY_TIME_MAX ((int64_t)S32_MAX) + +/* + * Smallest possible ondisk seconds value with bigtime timestamps. This + * corresponds (after conversion to a Unix timestamp) with the traditional + * minimum timestamp of Dec 13 20:45:52 UTC 1901. + */ +#define XFS_BIGTIME_TIME_MIN ((int64_t)0) + +/* + * Largest supported ondisk seconds value with bigtime timestamps. This + * corresponds (after conversion to a Unix timestamp) with an incore timestamp + * of Jul 2 20:20:24 UTC 2486. + * + * We round down the ondisk limit so that the bigtime quota and inode max + * timestamps will be the same. + */ +#define XFS_BIGTIME_TIME_MAX ((int64_t)((-1ULL / NSEC_PER_SEC) & ~0x3ULL)) + +/* + * Bigtime epoch is set exactly to the minimum time value that a traditional + * 32-bit timestamp can represent when using the Unix epoch as a reference. + * Hence the Unix epoch is at a fixed offset into the supported bigtime + * timestamp range. + * + * The bigtime epoch also matches the minimum value an on-disk 32-bit XFS + * timestamp can represent so we will not lose any fidelity in converting + * to/from unix and bigtime timestamps. + * + * The following conversion factor converts a seconds counter from the Unix + * epoch to the bigtime epoch. + */ +#define XFS_BIGTIME_EPOCH_OFFSET (-(int64_t)S32_MIN) + +/* Convert a timestamp from the Unix epoch to the bigtime epoch. */ +static inline uint64_t xfs_unix_to_bigtime(time64_t unix_seconds) +{ + return (uint64_t)unix_seconds + XFS_BIGTIME_EPOCH_OFFSET; +} + +/* Convert a timestamp from the bigtime epoch to the Unix epoch. */ +static inline time64_t xfs_bigtime_to_unix(uint64_t ondisk_seconds) +{ + return (time64_t)ondisk_seconds - XFS_BIGTIME_EPOCH_OFFSET; +} /* * On-disk inode structure. @@ -1061,12 +1163,22 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) #define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */ #define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */ #define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */ +#define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */ + #define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT) #define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT) #define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT) +#define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT) #define XFS_DIFLAG2_ANY \ - (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE) + (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \ + XFS_DIFLAG2_BIGTIME) + +static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip) +{ + return dip->di_version >= 3 && + (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_BIGTIME)); +} /* * Inode number format: @@ -1152,13 +1264,98 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) #define XFS_DQTYPE_USER 0x01 /* user dquot record */ #define XFS_DQTYPE_PROJ 0x02 /* project dquot record */ #define XFS_DQTYPE_GROUP 0x04 /* group dquot record */ +#define XFS_DQTYPE_BIGTIME 0x80 /* large expiry timestamps */ /* bitmask to determine if this is a user/group/project dquot */ #define XFS_DQTYPE_REC_MASK (XFS_DQTYPE_USER | \ XFS_DQTYPE_PROJ | \ XFS_DQTYPE_GROUP) -#define XFS_DQTYPE_ANY (XFS_DQTYPE_REC_MASK) +#define XFS_DQTYPE_ANY (XFS_DQTYPE_REC_MASK | \ + XFS_DQTYPE_BIGTIME) + +/* + * XFS Quota Timers + * ================ + * + * Traditional quota grace period expiration timers are an unsigned 32-bit + * seconds counter; time zero is the Unix epoch, Jan 1 00:00:01 UTC 1970. + * Note that an expiration value of zero means that the quota limit has not + * been reached, and therefore no expiration has been set. Therefore, the + * ondisk min and max defined here can be used directly to constrain the incore + * quota expiration timestamps on a Unix system. + * + * When bigtime is enabled, we trade two bits of precision to expand the + * expiration timeout range to match that of big inode timestamps. The min and + * max recorded here are the on-disk limits, not a Unix timestamp. + * + * The grace period for each quota type is stored in the root dquot (id = 0) + * and is applied to a non-root dquot when it exceeds the soft or hard limits. + * The length of quota grace periods are unsigned 32-bit quantities measured in + * units of seconds. A value of zero means to use the default period. + */ + +/* + * Smallest possible ondisk quota expiration value with traditional timestamps. + * This corresponds exactly with the incore expiration Jan 1 00:00:01 UTC 1970. + */ +#define XFS_DQ_LEGACY_EXPIRY_MIN ((int64_t)1) + +/* + * Largest possible ondisk quota expiration value with traditional timestamps. + * This corresponds exactly with the incore expiration Feb 7 06:28:15 UTC 2106. + */ +#define XFS_DQ_LEGACY_EXPIRY_MAX ((int64_t)U32_MAX) + +/* + * Smallest possible ondisk quota expiration value with bigtime timestamps. + * This corresponds (after conversion to a Unix timestamp) with the incore + * expiration of Jan 1 00:00:04 UTC 1970. + */ +#define XFS_DQ_BIGTIME_EXPIRY_MIN (XFS_DQ_LEGACY_EXPIRY_MIN) + +/* + * Largest supported ondisk quota expiration value with bigtime timestamps. + * This corresponds (after conversion to a Unix timestamp) with an incore + * expiration of Jul 2 20:20:24 UTC 2486. + * + * The ondisk field supports values up to -1U, which corresponds to an incore + * expiration in 2514. This is beyond the maximum the bigtime inode timestamp, + * so we cap the maximum bigtime quota expiration to the max inode timestamp. + */ +#define XFS_DQ_BIGTIME_EXPIRY_MAX ((int64_t)4074815106U) + +/* + * The following conversion factors assist in converting a quota expiration + * timestamp between the incore and ondisk formats. + */ +#define XFS_DQ_BIGTIME_SHIFT (2) +#define XFS_DQ_BIGTIME_SLACK ((int64_t)(1ULL << XFS_DQ_BIGTIME_SHIFT) - 1) + +/* Convert an incore quota expiration timestamp to an ondisk bigtime value. */ +static inline uint32_t xfs_dq_unix_to_bigtime(time64_t unix_seconds) +{ + /* + * Round the expiration timestamp up to the nearest bigtime timestamp + * that we can store, to give users the most time to fix problems. + */ + return ((uint64_t)unix_seconds + XFS_DQ_BIGTIME_SLACK) >> + XFS_DQ_BIGTIME_SHIFT; +} + +/* Convert an ondisk bigtime quota expiration value to an incore timestamp. */ +static inline time64_t xfs_dq_bigtime_to_unix(uint32_t ondisk_seconds) +{ + return (time64_t)ondisk_seconds << XFS_DQ_BIGTIME_SHIFT; +} + +/* + * Default quota grace periods, ranging from zero (use the compiled defaults) + * to ~136 years. These are applied to a non-root dquot that has exceeded + * either limit. + */ +#define XFS_DQ_GRACE_MIN ((int64_t)0) +#define XFS_DQ_GRACE_MAX ((int64_t)U32_MAX) /* * This is the main portion of the on-disk representation of quota information diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 84bcffa87753..2a2e3cfd94f0 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -249,6 +249,7 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_SPINODES (1 << 18) /* sparse inode chunks */ #define XFS_FSOP_GEOM_FLAGS_RMAPBT (1 << 19) /* reverse mapping btree */ #define XFS_FSOP_GEOM_FLAGS_REFLINK (1 << 20) /* files can share blocks */ +#define XFS_FSOP_GEOM_FLAGS_BIGTIME (1 << 21) /* 64-bit nsec timestamps */ /* * Minimum and maximum sizes need for growth checks. diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index a6b37db55169..974e71bc4a3a 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2473,6 +2473,7 @@ xfs_ialloc_log_agi( offsetof(xfs_agi_t, agi_unlinked), offsetof(xfs_agi_t, agi_free_root), offsetof(xfs_agi_t, agi_free_level), + offsetof(xfs_agi_t, agi_iblocks), sizeof(xfs_agi_t) }; #ifdef DEBUG @@ -2806,6 +2807,10 @@ xfs_ialloc_setup_geometry( uint64_t icount; uint inodes; + igeo->new_diflags2 = 0; + if (xfs_sb_version_hasbigtime(&mp->m_sb)) + igeo->new_diflags2 |= XFS_DIFLAG2_BIGTIME; + /* Compute inode btree geometry. */ igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog; igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 3c8aebc36e64..cc919a2ee870 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -67,6 +67,25 @@ xfs_finobt_set_root( XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL); } +/* Update the inode btree block counter for this btree. */ +static inline void +xfs_inobt_mod_blockcount( + struct xfs_btree_cur *cur, + int howmuch) +{ + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agi *agi = agbp->b_addr; + + if (!xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb)) + return; + + if (cur->bc_btnum == XFS_BTNUM_FINO) + be32_add_cpu(&agi->agi_fblocks, howmuch); + else if (cur->bc_btnum == XFS_BTNUM_INO) + be32_add_cpu(&agi->agi_iblocks, howmuch); + xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_IBLOCKS); +} + STATIC int __xfs_inobt_alloc_block( struct xfs_btree_cur *cur, @@ -102,6 +121,7 @@ __xfs_inobt_alloc_block( new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno)); *stat = 1; + xfs_inobt_mod_blockcount(cur, 1); return 0; } @@ -134,6 +154,7 @@ __xfs_inobt_free_block( struct xfs_buf *bp, enum xfs_ag_resv_type resv) { + xfs_inobt_mod_blockcount(cur, -1); return xfs_free_extent(cur->bc_tp, XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, &XFS_RMAP_OINFO_INOBT, resv); @@ -480,19 +501,29 @@ xfs_inobt_commit_staged_btree( { struct xfs_agi *agi = agbp->b_addr; struct xbtree_afakeroot *afake = cur->bc_ag.afake; + int fields; ASSERT(cur->bc_flags & XFS_BTREE_STAGING); if (cur->bc_btnum == XFS_BTNUM_INO) { + fields = XFS_AGI_ROOT | XFS_AGI_LEVEL; agi->agi_root = cpu_to_be32(afake->af_root); agi->agi_level = cpu_to_be32(afake->af_levels); - xfs_ialloc_log_agi(tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); + if (xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb)) { + agi->agi_iblocks = cpu_to_be32(afake->af_blocks); + fields |= XFS_AGI_IBLOCKS; + } + xfs_ialloc_log_agi(tp, agbp, fields); xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_inobt_ops); } else { + fields = XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL; agi->agi_free_root = cpu_to_be32(afake->af_root); agi->agi_free_level = cpu_to_be32(afake->af_levels); - xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREE_ROOT | - XFS_AGI_FREE_LEVEL); + if (xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb)) { + agi->agi_fblocks = cpu_to_be32(afake->af_blocks); + fields |= XFS_AGI_IBLOCKS; + } + xfs_ialloc_log_agi(tp, agbp, fields); xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_finobt_ops); } } @@ -673,6 +704,28 @@ xfs_inobt_count_blocks( return error; } +/* Read finobt block count from AGI header. */ +static int +xfs_finobt_read_blocks( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_extlen_t *tree_blocks) +{ + struct xfs_buf *agbp; + struct xfs_agi *agi; + int error; + + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + if (error) + return error; + + agi = agbp->b_addr; + *tree_blocks = be32_to_cpu(agi->agi_fblocks); + xfs_trans_brelse(tp, agbp); + return 0; +} + /* * Figure out how many blocks to reserve and how many are used by this btree. */ @@ -690,7 +743,11 @@ xfs_finobt_calc_reserves( if (!xfs_sb_version_hasfinobt(&mp->m_sb)) return 0; - error = xfs_inobt_count_blocks(mp, tp, agno, XFS_BTNUM_FINO, &tree_len); + if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) + error = xfs_finobt_read_blocks(mp, tp, agno, &tree_len); + else + error = xfs_inobt_count_blocks(mp, tp, agno, XFS_BTNUM_FINO, + &tree_len); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 52451809c478..b4164256993d 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -603,7 +603,7 @@ xfs_iext_realloc_root( if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF) new_size = NODE_SIZE; - new = kmem_realloc(ifp->if_u1.if_root, new_size, KM_NOFS); + new = krealloc(ifp->if_u1.if_root, new_size, GFP_NOFS | __GFP_NOFAIL); memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes); ifp->if_u1.if_root = new; cur->leaf = new; diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 8d5dd08eab75..c667c63f2cb0 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -157,6 +157,36 @@ xfs_imap_to_bp( return 0; } +static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts) +{ + struct timespec64 tv; + uint32_t n; + + tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n)); + tv.tv_nsec = n; + + return tv; +} + +/* Convert an ondisk timestamp to an incore timestamp. */ +struct timespec64 +xfs_inode_from_disk_ts( + struct xfs_dinode *dip, + const xfs_timestamp_t ts) +{ + struct timespec64 tv; + struct xfs_legacy_timestamp *lts; + + if (xfs_dinode_has_bigtime(dip)) + return xfs_inode_decode_bigtime(be64_to_cpu(ts)); + + lts = (struct xfs_legacy_timestamp *)&ts; + tv.tv_sec = (int)be32_to_cpu(lts->t_sec); + tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec); + + return tv; +} + int xfs_inode_from_disk( struct xfs_inode *ip, @@ -211,12 +241,9 @@ xfs_inode_from_disk( * a time before epoch is converted to a time long after epoch * on 64 bit systems. */ - inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec); - inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec); - inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec); - inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec); - inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec); - inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec); + inode->i_atime = xfs_inode_from_disk_ts(from, from->di_atime); + inode->i_mtime = xfs_inode_from_disk_ts(from, from->di_mtime); + inode->i_ctime = xfs_inode_from_disk_ts(from, from->di_ctime); to->di_size = be64_to_cpu(from->di_size); to->di_nblocks = be64_to_cpu(from->di_nblocks); @@ -229,8 +256,7 @@ xfs_inode_from_disk( if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { inode_set_iversion_queried(inode, be64_to_cpu(from->di_changecount)); - to->di_crtime.tv_sec = be32_to_cpu(from->di_crtime.t_sec); - to->di_crtime.tv_nsec = be32_to_cpu(from->di_crtime.t_nsec); + to->di_crtime = xfs_inode_from_disk_ts(from, from->di_crtime); to->di_flags2 = be64_to_cpu(from->di_flags2); to->di_cowextsize = be32_to_cpu(from->di_cowextsize); } @@ -252,6 +278,25 @@ out_destroy_data_fork: return error; } +/* Convert an incore timestamp to an ondisk timestamp. */ +static inline xfs_timestamp_t +xfs_inode_to_disk_ts( + struct xfs_inode *ip, + const struct timespec64 tv) +{ + struct xfs_legacy_timestamp *lts; + xfs_timestamp_t ts; + + if (xfs_inode_has_bigtime(ip)) + return cpu_to_be64(xfs_inode_encode_bigtime(tv)); + + lts = (struct xfs_legacy_timestamp *)&ts; + lts->t_sec = cpu_to_be32(tv.tv_sec); + lts->t_nsec = cpu_to_be32(tv.tv_nsec); + + return ts; +} + void xfs_inode_to_disk( struct xfs_inode *ip, @@ -271,12 +316,9 @@ xfs_inode_to_disk( to->di_projid_hi = cpu_to_be16(from->di_projid >> 16); memset(to->di_pad, 0, sizeof(to->di_pad)); - to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec); - to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec); - to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec); - to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec); - to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec); - to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec); + to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime); + to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime); + to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime); to->di_nlink = cpu_to_be32(inode->i_nlink); to->di_gen = cpu_to_be32(inode->i_generation); to->di_mode = cpu_to_be16(inode->i_mode); @@ -295,8 +337,7 @@ xfs_inode_to_disk( if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { to->di_version = 3; to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); - to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.tv_sec); - to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.tv_nsec); + to->di_crtime = xfs_inode_to_disk_ts(ip, from->di_crtime); to->di_flags2 = cpu_to_be64(from->di_flags2); to->di_cowextsize = cpu_to_be32(from->di_cowextsize); to->di_ino = cpu_to_be64(ip->i_ino); @@ -310,58 +351,6 @@ xfs_inode_to_disk( } } -void -xfs_log_dinode_to_disk( - struct xfs_log_dinode *from, - struct xfs_dinode *to) -{ - to->di_magic = cpu_to_be16(from->di_magic); - to->di_mode = cpu_to_be16(from->di_mode); - to->di_version = from->di_version; - to->di_format = from->di_format; - to->di_onlink = 0; - to->di_uid = cpu_to_be32(from->di_uid); - to->di_gid = cpu_to_be32(from->di_gid); - to->di_nlink = cpu_to_be32(from->di_nlink); - to->di_projid_lo = cpu_to_be16(from->di_projid_lo); - to->di_projid_hi = cpu_to_be16(from->di_projid_hi); - memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); - - to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); - to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); - to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); - to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); - to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); - to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); - - to->di_size = cpu_to_be64(from->di_size); - to->di_nblocks = cpu_to_be64(from->di_nblocks); - to->di_extsize = cpu_to_be32(from->di_extsize); - to->di_nextents = cpu_to_be32(from->di_nextents); - to->di_anextents = cpu_to_be16(from->di_anextents); - to->di_forkoff = from->di_forkoff; - to->di_aformat = from->di_aformat; - to->di_dmevmask = cpu_to_be32(from->di_dmevmask); - to->di_dmstate = cpu_to_be16(from->di_dmstate); - to->di_flags = cpu_to_be16(from->di_flags); - to->di_gen = cpu_to_be32(from->di_gen); - - if (from->di_version == 3) { - to->di_changecount = cpu_to_be64(from->di_changecount); - to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); - to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); - to->di_flags2 = cpu_to_be64(from->di_flags2); - to->di_cowextsize = cpu_to_be32(from->di_cowextsize); - to->di_ino = cpu_to_be64(from->di_ino); - to->di_lsn = cpu_to_be64(from->di_lsn); - memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); - uuid_copy(&to->di_uuid, &from->di_uuid); - to->di_flushiter = 0; - } else { - to->di_flushiter = cpu_to_be16(from->di_flushiter); - } -} - static xfs_failaddr_t xfs_dinode_verify_fork( struct xfs_dinode *dip, @@ -568,6 +557,11 @@ xfs_dinode_verify( if (fa) return fa; + /* bigtime iflag can only happen on bigtime filesystems */ + if (xfs_dinode_has_bigtime(dip) && + !xfs_sb_version_hasbigtime(&mp->m_sb)) + return __this_address; + return NULL; } diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 6b08b9d060c2..536666143fe7 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -32,6 +32,11 @@ struct xfs_icdinode { struct timespec64 di_crtime; /* time created */ }; +static inline bool xfs_icdinode_has_bigtime(const struct xfs_icdinode *icd) +{ + return icd->di_flags2 & XFS_DIFLAG2_BIGTIME; +} + /* * Inode location information. Stored in the inode and passed to * xfs_imap_to_bp() to get a buffer and dinode for a given inode. @@ -49,8 +54,6 @@ void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *); void xfs_inode_to_disk(struct xfs_inode *ip, struct xfs_dinode *to, xfs_lsn_t lsn); int xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from); -void xfs_log_dinode_to_disk(struct xfs_log_dinode *from, - struct xfs_dinode *to); xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, struct xfs_dinode *dip); @@ -60,4 +63,12 @@ xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp, uint32_t cowextsize, uint16_t mode, uint16_t flags, uint64_t flags2); +static inline uint64_t xfs_inode_encode_bigtime(struct timespec64 tv) +{ + return xfs_unix_to_bigtime(tv.tv_sec) * NSEC_PER_SEC + tv.tv_nsec; +} + +struct timespec64 xfs_inode_from_disk_ts(struct xfs_dinode *dip, + const xfs_timestamp_t ts); + #endif /* __XFS_INODE_BUF_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 0cf853d42d62..7575de5cecb1 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -386,8 +386,8 @@ xfs_iroot_realloc( cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); new_max = cur_max + rec_diff; new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); - ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, - KM_NOFS); + ifp->if_broot = krealloc(ifp->if_broot, new_size, + GFP_NOFS | __GFP_NOFAIL); op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, ifp->if_broot_bytes); np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, @@ -496,8 +496,8 @@ xfs_idata_realloc( * in size so that it can be logged and stay on word boundaries. * We enforce that here. */ - ifp->if_u1.if_data = kmem_realloc(ifp->if_u1.if_data, - roundup(new_size, 4), KM_NOFS); + ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, roundup(new_size, 4), + GFP_NOFS | __GFP_NOFAIL); ifp->if_bytes = new_size; } diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index e3400c9c71cd..8bd00da6d2a4 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -368,10 +368,13 @@ static inline int xfs_ilog_fdata(int w) * directly mirrors the xfs_dinode structure as it must contain all the same * information. */ -typedef struct xfs_ictimestamp { +typedef uint64_t xfs_ictimestamp_t; + +/* Legacy timestamp encoding format. */ +struct xfs_legacy_ictimestamp { int32_t t_sec; /* timestamp seconds */ int32_t t_nsec; /* timestamp nanoseconds */ -} xfs_ictimestamp_t; +}; /* * Define the format of the inode core that is logged. This structure must be diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index 641132d0e39d..3cca2bfe714c 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h @@ -121,7 +121,6 @@ struct xlog_recover { void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len, const struct xfs_buf_ops *ops); bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len); -void xlog_recover_iodone(struct xfs_buf *bp); void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type, uint64_t intent_id); diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index 076bdc7037ee..0f0af4e35032 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -23,7 +23,8 @@ typedef uint8_t xfs_dqtype_t; #define XFS_DQTYPE_STRINGS \ { XFS_DQTYPE_USER, "USER" }, \ { XFS_DQTYPE_PROJ, "PROJ" }, \ - { XFS_DQTYPE_GROUP, "GROUP" } + { XFS_DQTYPE_GROUP, "GROUP" }, \ + { XFS_DQTYPE_BIGTIME, "BIGTIME" } /* * flags for q_flags field in the dquot. @@ -143,4 +144,9 @@ extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); extern void xfs_dqblk_repair(struct xfs_mount *mp, struct xfs_dqblk *dqb, xfs_dqid_t id, xfs_dqtype_t type); +struct xfs_dquot; +time64_t xfs_dquot_from_disk_ts(struct xfs_disk_dquot *ddq, + __be32 dtimer); +__be32 xfs_dquot_to_disk_ts(struct xfs_dquot *ddq, time64_t timer); + #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index ae9aaf1f34bf..5aeafa59ed27 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -954,7 +954,7 @@ xfs_log_sb( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_buf *bp = xfs_trans_getsb(tp, mp); + struct xfs_buf *bp = xfs_trans_getsb(tp); mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); @@ -1084,7 +1084,7 @@ xfs_sync_sb_buf( if (error) return error; - bp = xfs_trans_getsb(tp, mp); + bp = xfs_trans_getsb(tp); xfs_log_sb(tp); xfs_trans_bhold(tp, bp); xfs_trans_set_sync(tp); @@ -1166,6 +1166,8 @@ xfs_fs_geometry( geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT; if (xfs_sb_version_hasreflink(sbp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK; + if (xfs_sb_version_hasbigtime(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_BIGTIME; if (xfs_sb_version_hassector(sbp)) geo->logsectsize = sbp->sb_logsectsize; else diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 708feb8eac76..c795ae47b3c9 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -176,6 +176,9 @@ struct xfs_ino_geometry { unsigned int ialloc_align; unsigned int agino_log; /* #bits for agino in inum */ + + /* precomputed value for di_flags2 */ + uint64_t new_diflags2; }; #endif /* __XFS_SHARED_H__ */ diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index b7e222befb08..90f1d5645052 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -131,6 +131,17 @@ xfs_trans_log_inode( iversion_flags = XFS_ILOG_CORE; } + /* + * If we're updating the inode core or the timestamps and it's possible + * to upgrade this inode to bigtime format, do so now. + */ + if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) && + xfs_sb_version_hasbigtime(&ip->i_mount->m_sb) && + !xfs_inode_has_bigtime(ip)) { + ip->i_d.di_flags2 |= XFS_DIFLAG2_BIGTIME; + flags |= XFS_ILOG_CORE; + } + /* * Record the specific change for fdatasync optimisation. This allows * fdatasync to skip log forces for inodes that are only timestamp @@ -177,9 +188,9 @@ xfs_trans_log_inode( /* * Always OR in the bits from the ili_last_fields field. This is to - * coordinate with the xfs_iflush() and xfs_iflush_done() routines in - * the eventual clearing of the ili_fields bits. See the big comment in - * xfs_iflush() for an explanation of this coordination mechanism. + * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines + * in the eventual clearing of the ili_fields bits. See the big comment + * in xfs_iflush() for an explanation of this coordination mechanism. */ iip->ili_fields |= (flags | iip->ili_last_fields | iversion_flags); spin_unlock(&iip->ili_lock); diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index e9bcf1faa183..ae8e2e0ac64a 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -781,6 +781,35 @@ xchk_agi_xref_icounts( xchk_block_xref_set_corrupt(sc, sc->sa.agi_bp); } +/* Check agi_[fi]blocks against tree size */ +static inline void +xchk_agi_xref_fiblocks( + struct xfs_scrub *sc) +{ + struct xfs_agi *agi = sc->sa.agi_bp->b_addr; + xfs_agblock_t blocks; + int error = 0; + + if (!xfs_sb_version_hasinobtcounts(&sc->mp->m_sb)) + return; + + if (sc->sa.ino_cur) { + error = xfs_btree_count_blocks(sc->sa.ino_cur, &blocks); + if (!xchk_should_check_xref(sc, &error, &sc->sa.ino_cur)) + return; + if (blocks != be32_to_cpu(agi->agi_iblocks)) + xchk_block_xref_set_corrupt(sc, sc->sa.agi_bp); + } + + if (sc->sa.fino_cur) { + error = xfs_btree_count_blocks(sc->sa.fino_cur, &blocks); + if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur)) + return; + if (blocks != be32_to_cpu(agi->agi_fblocks)) + xchk_block_xref_set_corrupt(sc, sc->sa.agi_bp); + } +} + /* Cross-reference with the other btrees. */ STATIC void xchk_agi_xref( @@ -804,6 +833,7 @@ xchk_agi_xref( xchk_agi_xref_icounts(sc); xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_FS); xchk_xref_is_not_shared(sc, agbno, 1); + xchk_agi_xref_fiblocks(sc); /* scrub teardown will take care of sc->sa for us */ } diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index bca2ab1d4be9..401f71579ce6 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -810,10 +810,34 @@ xrep_agi_calc_from_btrees( error = xfs_ialloc_count_inodes(cur, &count, &freecount); if (error) goto err; + if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) { + xfs_agblock_t blocks; + + error = xfs_btree_count_blocks(cur, &blocks); + if (error) + goto err; + agi->agi_iblocks = cpu_to_be32(blocks); + } xfs_btree_del_cursor(cur, error); agi->agi_count = cpu_to_be32(count); agi->agi_freecount = cpu_to_be32(freecount); + + if (xfs_sb_version_hasfinobt(&mp->m_sb) && + xfs_sb_version_hasinobtcounts(&mp->m_sb)) { + xfs_agblock_t blocks; + + cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, sc->sa.agno, + XFS_BTNUM_FINO); + if (error) + goto err; + error = xfs_btree_count_blocks(cur, &blocks); + if (error) + goto err; + xfs_btree_del_cursor(cur, error); + agi->agi_fblocks = cpu_to_be32(blocks); + } + return 0; err: xfs_btree_del_cursor(cur, error); diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 6d483ab29e63..3aa85b64de36 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -190,11 +190,30 @@ xchk_inode_flags2( if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK)) goto bad; + /* no bigtime iflag without the bigtime feature */ + if (xfs_dinode_has_bigtime(dip) && + !xfs_sb_version_hasbigtime(&mp->m_sb)) + goto bad; + return; bad: xchk_ino_set_corrupt(sc, ino); } +static inline void +xchk_dinode_nsec( + struct xfs_scrub *sc, + xfs_ino_t ino, + struct xfs_dinode *dip, + const xfs_timestamp_t ts) +{ + struct timespec64 tv; + + tv = xfs_inode_from_disk_ts(dip, ts); + if (tv.tv_nsec < 0 || tv.tv_nsec >= NSEC_PER_SEC) + xchk_ino_set_corrupt(sc, ino); +} + /* Scrub all the ondisk inode fields. */ STATIC void xchk_dinode( @@ -293,12 +312,9 @@ xchk_dinode( } /* di_[amc]time.nsec */ - if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC) - xchk_ino_set_corrupt(sc, ino); - if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC) - xchk_ino_set_corrupt(sc, ino); - if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC) - xchk_ino_set_corrupt(sc, ino); + xchk_dinode_nsec(sc, ino, dip, dip->di_atime); + xchk_dinode_nsec(sc, ino, dip, dip->di_mtime); + xchk_dinode_nsec(sc, ino, dip, dip->di_ctime); /* * di_size. xfs_dinode_verify checks for things that screw up @@ -403,8 +419,7 @@ xchk_dinode( } if (dip->di_version >= 3) { - if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC) - xchk_ino_set_corrupt(sc, ino); + xchk_dinode_nsec(sc, ino, dip, dip->di_crtime); xchk_inode_flags2(sc, dip, ino, mode, flags, flags2); xchk_inode_cowextsize(sc, dip, ino, mode, flags, flags2); diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c index 5641ae512c9e..c08be5ede066 100644 --- a/fs/xfs/scrub/symlink.c +++ b/fs/xfs/scrub/symlink.c @@ -22,7 +22,7 @@ xchk_setup_symlink( struct xfs_inode *ip) { /* Allocate the buffer without the inode lock held. */ - sc->buf = kmem_zalloc_large(XFS_SYMLINK_MAXLEN + 1, 0); + sc->buf = kvzalloc(XFS_SYMLINK_MAXLEN + 1, GFP_KERNEL); if (!sc->buf) return -ENOMEM; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index d4c687b5cd06..c544951a0c07 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -192,7 +192,7 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (acl) { args.valuelen = XFS_ACL_SIZE(acl->a_count); - args.value = kmem_zalloc_large(args.valuelen, 0); + args.value = kvzalloc(args.valuelen, GFP_KERNEL); if (!args.value) return -ENOMEM; xfs_acl_to_disk(args.value, acl); diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 50f922cad91a..8f8837fe21cf 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -61,7 +61,7 @@ xfs_attr_shortform_list( int error = 0; ASSERT(dp->i_afp != NULL); - sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data; + sf = (struct xfs_attr_shortform *)dp->i_afp->if_u1.if_data; ASSERT(sf != NULL); if (!sf->hdr.count) return 0; @@ -96,7 +96,7 @@ xfs_attr_shortform_list( */ if (context->seen_enough) break; - sfe = XFS_ATTR_SF_NEXTENTRY(sfe); + sfe = xfs_attr_sf_nextentry(sfe); } trace_xfs_attr_list_sf_all(context); return 0; @@ -136,7 +136,7 @@ xfs_attr_shortform_list( /* These are bytes, and both on-disk, don't endian-flip */ sbp->valuelen = sfe->valuelen; sbp->flags = sfe->flags; - sfe = XFS_ATTR_SF_NEXTENTRY(sfe); + sfe = xfs_attr_sf_nextentry(sfe); sbp++; nsbuf++; } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 5123f82f2477..f2a8a0e75e1f 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -946,6 +946,14 @@ xfs_free_file_space( startoffset_fsb = XFS_B_TO_FSB(mp, offset); endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); + /* We can only free complete realtime extents. */ + if (XFS_IS_REALTIME_INODE(ip)) { + xfs_extlen_t extsz = xfs_get_extsz_hint(ip); + + if ((startoffset_fsb | endoffset_fsb) & (extsz - 1)) + return -EINVAL; + } + /* * Need to zero the stuff we're not freeing, on disk. */ @@ -1139,6 +1147,14 @@ xfs_insert_file_space( trace_xfs_insert_file_space(ip); + /* We can only insert complete realtime extents. */ + if (XFS_IS_REALTIME_INODE(ip)) { + xfs_extlen_t extsz = xfs_get_extsz_hint(ip); + + if ((stop_fsb | shift_fsb) & (extsz - 1)) + return -EINVAL; + } + error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb); if (error) return error; diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d4cdcb6fb2fe..4e4cf91f4f9f 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -52,6 +52,15 @@ static kmem_zone_t *xfs_buf_zone; * b_lock (trylock due to inversion) */ +static int __xfs_buf_submit(struct xfs_buf *bp, bool wait); + +static inline int +xfs_buf_submit( + struct xfs_buf *bp) +{ + return __xfs_buf_submit(bp, !(bp->b_flags & XBF_ASYNC)); +} + static inline int xfs_buf_is_vmapped( struct xfs_buf *bp) @@ -751,7 +760,7 @@ found: return 0; } -STATIC int +int _xfs_buf_read( xfs_buf_t *bp, xfs_buf_flags_t flags) @@ -759,7 +768,7 @@ _xfs_buf_read( ASSERT(!(flags & XBF_WRITE)); ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL); - bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); + bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE); bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); return xfs_buf_submit(bp); @@ -1170,20 +1179,145 @@ xfs_buf_wait_unpin( set_current_state(TASK_RUNNING); } -/* - * Buffer Utility Routines - */ +static void +xfs_buf_ioerror_alert_ratelimited( + struct xfs_buf *bp) +{ + static unsigned long lasttime; + static struct xfs_buftarg *lasttarg; -void + if (bp->b_target != lasttarg || + time_after(jiffies, (lasttime + 5*HZ))) { + lasttime = jiffies; + xfs_buf_ioerror_alert(bp, __this_address); + } + lasttarg = bp->b_target; +} + +/* + * Account for this latest trip around the retry handler, and decide if + * we've failed enough times to constitute a permanent failure. + */ +static bool +xfs_buf_ioerror_permanent( + struct xfs_buf *bp, + struct xfs_error_cfg *cfg) +{ + struct xfs_mount *mp = bp->b_mount; + + if (cfg->max_retries != XFS_ERR_RETRY_FOREVER && + ++bp->b_retries > cfg->max_retries) + return true; + if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER && + time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time)) + return true; + + /* At unmount we may treat errors differently */ + if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount) + return true; + + return false; +} + +/* + * On a sync write or shutdown we just want to stale the buffer and let the + * caller handle the error in bp->b_error appropriately. + * + * If the write was asynchronous then no one will be looking for the error. If + * this is the first failure of this type, clear the error state and write the + * buffer out again. This means we always retry an async write failure at least + * once, but we also need to set the buffer up to behave correctly now for + * repeated failures. + * + * If we get repeated async write failures, then we take action according to the + * error configuration we have been set up to use. + * + * Returns true if this function took care of error handling and the caller must + * not touch the buffer again. Return false if the caller should proceed with + * normal I/O completion handling. + */ +static bool +xfs_buf_ioend_handle_error( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_mount; + struct xfs_error_cfg *cfg; + + /* + * If we've already decided to shutdown the filesystem because of I/O + * errors, there's no point in giving this a retry. + */ + if (XFS_FORCED_SHUTDOWN(mp)) + goto out_stale; + + xfs_buf_ioerror_alert_ratelimited(bp); + + /* + * We're not going to bother about retrying this during recovery. + * One strike! + */ + if (bp->b_flags & _XBF_LOGRECOVERY) { + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + return false; + } + + /* + * Synchronous writes will have callers process the error. + */ + if (!(bp->b_flags & XBF_ASYNC)) + goto out_stale; + + trace_xfs_buf_iodone_async(bp, _RET_IP_); + + cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error); + if (bp->b_last_error != bp->b_error || + !(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL))) { + bp->b_last_error = bp->b_error; + if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER && + !bp->b_first_retry_time) + bp->b_first_retry_time = jiffies; + goto resubmit; + } + + /* + * Permanent error - we need to trigger a shutdown if we haven't already + * to indicate that inconsistency will result from this action. + */ + if (xfs_buf_ioerror_permanent(bp, cfg)) { + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + goto out_stale; + } + + /* Still considered a transient error. Caller will schedule retries. */ + if (bp->b_flags & _XBF_INODES) + xfs_buf_inode_io_fail(bp); + else if (bp->b_flags & _XBF_DQUOTS) + xfs_buf_dquot_io_fail(bp); + else + ASSERT(list_empty(&bp->b_li_list)); + xfs_buf_ioerror(bp, 0); + xfs_buf_relse(bp); + return true; + +resubmit: + xfs_buf_ioerror(bp, 0); + bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL); + xfs_buf_submit(bp); + return true; +out_stale: + xfs_buf_stale(bp); + bp->b_flags |= XBF_DONE; + bp->b_flags &= ~XBF_WRITE; + trace_xfs_buf_error_relse(bp, _RET_IP_); + return false; +} + +static void xfs_buf_ioend( struct xfs_buf *bp) { - bool read = bp->b_flags & XBF_READ; - trace_xfs_buf_iodone(bp, _RET_IP_); - bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); - /* * Pull in IO completion errors now. We are guaranteed to be running * single threaded, so we don't need the lock to read b_io_error. @@ -1191,39 +1325,47 @@ xfs_buf_ioend( if (!bp->b_error && bp->b_io_error) xfs_buf_ioerror(bp, bp->b_io_error); - if (read) { + if (bp->b_flags & XBF_READ) { if (!bp->b_error && bp->b_ops) bp->b_ops->verify_read(bp); if (!bp->b_error) bp->b_flags |= XBF_DONE; - xfs_buf_ioend_finish(bp); - return; + } else { + if (!bp->b_error) { + bp->b_flags &= ~XBF_WRITE_FAIL; + bp->b_flags |= XBF_DONE; + } + + if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp)) + return; + + /* clear the retry state */ + bp->b_last_error = 0; + bp->b_retries = 0; + bp->b_first_retry_time = 0; + + /* + * Note that for things like remote attribute buffers, there may + * not be a buffer log item here, so processing the buffer log + * item must remain optional. + */ + if (bp->b_log_item) + xfs_buf_item_done(bp); + + if (bp->b_flags & _XBF_INODES) + xfs_buf_inode_iodone(bp); + else if (bp->b_flags & _XBF_DQUOTS) + xfs_buf_dquot_iodone(bp); + } - if (!bp->b_error) { - bp->b_flags &= ~XBF_WRITE_FAIL; - bp->b_flags |= XBF_DONE; - } + bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD | + _XBF_LOGRECOVERY); - /* - * If this is a log recovery buffer, we aren't doing transactional IO - * yet so we need to let it handle IO completions. - */ - if (bp->b_flags & _XBF_LOGRECOVERY) { - xlog_recover_iodone(bp); - return; - } - - if (bp->b_flags & _XBF_INODES) { - xfs_buf_inode_iodone(bp); - return; - } - - if (bp->b_flags & _XBF_DQUOTS) { - xfs_buf_dquot_iodone(bp); - return; - } - xfs_buf_iodone(bp); + if (bp->b_flags & XBF_ASYNC) + xfs_buf_relse(bp); + else + complete(&bp->b_iowait); } static void @@ -1506,7 +1648,7 @@ xfs_buf_iowait( * safe to reference the buffer after a call to this function unless the caller * holds an additional reference itself. */ -int +static int __xfs_buf_submit( struct xfs_buf *bp, bool wait) diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 755b652e695a..bfd2907e7bc4 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -249,6 +249,7 @@ int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int flags, int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, size_t numblks, int flags, struct xfs_buf **bpp, const struct xfs_buf_ops *ops); +int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags); void xfs_buf_hold(struct xfs_buf *bp); /* Releasing Buffers */ @@ -269,28 +270,12 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) /* Buffer Read and Write Routines */ extern int xfs_bwrite(struct xfs_buf *bp); -extern void xfs_buf_ioend(struct xfs_buf *bp); -static inline void xfs_buf_ioend_finish(struct xfs_buf *bp) -{ - if (bp->b_flags & XBF_ASYNC) - xfs_buf_relse(bp); - else - complete(&bp->b_iowait); -} extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error, xfs_failaddr_t failaddr); #define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address) extern void xfs_buf_ioerror_alert(struct xfs_buf *bp, xfs_failaddr_t fa); void xfs_buf_ioend_fail(struct xfs_buf *); - -extern int __xfs_buf_submit(struct xfs_buf *bp, bool); -static inline int xfs_buf_submit(struct xfs_buf *bp) -{ - bool wait = bp->b_flags & XBF_ASYNC ? false : true; - return __xfs_buf_submit(bp, wait); -} - void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize); void __xfs_buf_mark_corrupt(struct xfs_buf *bp, xfs_failaddr_t fa); #define xfs_buf_mark_corrupt(bp) __xfs_buf_mark_corrupt((bp), __this_address) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 408d1b572d3f..0356f2e340a1 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -30,8 +30,6 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip) return container_of(lip, struct xfs_buf_log_item, bli_item); } -static void xfs_buf_item_done(struct xfs_buf *bp); - /* Is this log iovec plausibly large enough to contain the buffer log format? */ bool xfs_buf_log_check_iovec( @@ -463,7 +461,7 @@ xfs_buf_item_unpin( */ if (bip->bli_flags & XFS_BLI_STALE_INODE) { xfs_buf_item_done(bp); - xfs_iflush_done(bp); + xfs_buf_inode_iodone(bp); ASSERT(list_empty(&bp->b_li_list)); } else { xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); @@ -956,153 +954,10 @@ xfs_buf_item_relse( xfs_buf_item_free(bip); } -/* - * Decide if we're going to retry the write after a failure, and prepare - * the buffer for retrying the write. - */ -static bool -xfs_buf_ioerror_fail_without_retry( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_mount; - static ulong lasttime; - static xfs_buftarg_t *lasttarg; - - /* - * If we've already decided to shutdown the filesystem because of - * I/O errors, there's no point in giving this a retry. - */ - if (XFS_FORCED_SHUTDOWN(mp)) - return true; - - if (bp->b_target != lasttarg || - time_after(jiffies, (lasttime + 5*HZ))) { - lasttime = jiffies; - xfs_buf_ioerror_alert(bp, __this_address); - } - lasttarg = bp->b_target; - - /* synchronous writes will have callers process the error */ - if (!(bp->b_flags & XBF_ASYNC)) - return true; - return false; -} - -static bool -xfs_buf_ioerror_retry( - struct xfs_buf *bp, - struct xfs_error_cfg *cfg) -{ - if ((bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) && - bp->b_last_error == bp->b_error) - return false; - - bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL); - bp->b_last_error = bp->b_error; - if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER && - !bp->b_first_retry_time) - bp->b_first_retry_time = jiffies; - return true; -} - -/* - * Account for this latest trip around the retry handler, and decide if - * we've failed enough times to constitute a permanent failure. - */ -static bool -xfs_buf_ioerror_permanent( - struct xfs_buf *bp, - struct xfs_error_cfg *cfg) -{ - struct xfs_mount *mp = bp->b_mount; - - if (cfg->max_retries != XFS_ERR_RETRY_FOREVER && - ++bp->b_retries > cfg->max_retries) - return true; - if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER && - time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time)) - return true; - - /* At unmount we may treat errors differently */ - if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount) - return true; - - return false; -} - -/* - * On a sync write or shutdown we just want to stale the buffer and let the - * caller handle the error in bp->b_error appropriately. - * - * If the write was asynchronous then no one will be looking for the error. If - * this is the first failure of this type, clear the error state and write the - * buffer out again. This means we always retry an async write failure at least - * once, but we also need to set the buffer up to behave correctly now for - * repeated failures. - * - * If we get repeated async write failures, then we take action according to the - * error configuration we have been set up to use. - * - * Multi-state return value: - * - * XBF_IOERROR_FINISH: clear IO error retry state and run callback completions - * XBF_IOERROR_DONE: resubmitted immediately, do not run any completions - * XBF_IOERROR_FAIL: transient error, run failure callback completions and then - * release the buffer - */ -enum { - XBF_IOERROR_FINISH, - XBF_IOERROR_DONE, - XBF_IOERROR_FAIL, -}; - -static int -xfs_buf_iodone_error( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_mount; - struct xfs_error_cfg *cfg; - - if (xfs_buf_ioerror_fail_without_retry(bp)) - goto out_stale; - - trace_xfs_buf_item_iodone_async(bp, _RET_IP_); - - cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error); - if (xfs_buf_ioerror_retry(bp, cfg)) { - xfs_buf_ioerror(bp, 0); - xfs_buf_submit(bp); - return XBF_IOERROR_DONE; - } - - /* - * Permanent error - we need to trigger a shutdown if we haven't already - * to indicate that inconsistency will result from this action. - */ - if (xfs_buf_ioerror_permanent(bp, cfg)) { - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); - goto out_stale; - } - - /* Still considered a transient error. Caller will schedule retries. */ - return XBF_IOERROR_FAIL; - -out_stale: - xfs_buf_stale(bp); - bp->b_flags |= XBF_DONE; - trace_xfs_buf_error_relse(bp, _RET_IP_); - return XBF_IOERROR_FINISH; -} - -static void +void xfs_buf_item_done( struct xfs_buf *bp) { - struct xfs_buf_log_item *bip = bp->b_log_item; - - if (!bip) - return; - /* * If we are forcibly shutting down, this may well be off the AIL * already. That's because we simulate the log-committed callbacks to @@ -1111,113 +966,12 @@ xfs_buf_item_done( * xfs_trans_ail_delete() takes care of these. * * Either way, AIL is useless if we're forcing a shutdown. + * + * Note that log recovery writes might have buffer items that are not on + * the AIL even when the file system is not shut down. */ - xfs_trans_ail_delete(&bip->bli_item, SHUTDOWN_CORRUPT_INCORE); - bp->b_log_item = NULL; - xfs_buf_item_free(bip); - xfs_buf_rele(bp); -} - -static inline void -xfs_buf_clear_ioerror_retry_state( - struct xfs_buf *bp) -{ - bp->b_last_error = 0; - bp->b_retries = 0; - bp->b_first_retry_time = 0; -} - -/* - * Inode buffer iodone callback function. - */ -void -xfs_buf_inode_iodone( - struct xfs_buf *bp) -{ - if (bp->b_error) { - struct xfs_log_item *lip; - int ret = xfs_buf_iodone_error(bp); - - if (ret == XBF_IOERROR_FINISH) - goto finish_iodone; - if (ret == XBF_IOERROR_DONE) - return; - ASSERT(ret == XBF_IOERROR_FAIL); - list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { - set_bit(XFS_LI_FAILED, &lip->li_flags); - } - xfs_buf_ioerror(bp, 0); - xfs_buf_relse(bp); - return; - } - -finish_iodone: - xfs_buf_clear_ioerror_retry_state(bp); - xfs_buf_item_done(bp); - xfs_iflush_done(bp); - xfs_buf_ioend_finish(bp); -} - -/* - * Dquot buffer iodone callback function. - */ -void -xfs_buf_dquot_iodone( - struct xfs_buf *bp) -{ - if (bp->b_error) { - struct xfs_log_item *lip; - int ret = xfs_buf_iodone_error(bp); - - if (ret == XBF_IOERROR_FINISH) - goto finish_iodone; - if (ret == XBF_IOERROR_DONE) - return; - ASSERT(ret == XBF_IOERROR_FAIL); - spin_lock(&bp->b_mount->m_ail->ail_lock); - list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { - xfs_set_li_failed(lip, bp); - } - spin_unlock(&bp->b_mount->m_ail->ail_lock); - xfs_buf_ioerror(bp, 0); - xfs_buf_relse(bp); - return; - } - -finish_iodone: - xfs_buf_clear_ioerror_retry_state(bp); - /* a newly allocated dquot buffer might have a log item attached */ - xfs_buf_item_done(bp); - xfs_dquot_done(bp); - xfs_buf_ioend_finish(bp); -} - -/* - * Dirty buffer iodone callback function. - * - * Note that for things like remote attribute buffers, there may not be a buffer - * log item here, so processing the buffer log item must remain be optional. - */ -void -xfs_buf_iodone( - struct xfs_buf *bp) -{ - if (bp->b_error) { - int ret = xfs_buf_iodone_error(bp); - - if (ret == XBF_IOERROR_FINISH) - goto finish_iodone; - if (ret == XBF_IOERROR_DONE) - return; - ASSERT(ret == XBF_IOERROR_FAIL); - ASSERT(list_empty(&bp->b_li_list)); - xfs_buf_ioerror(bp, 0); - xfs_buf_relse(bp); - return; - } - -finish_iodone: - xfs_buf_clear_ioerror_retry_state(bp); - xfs_buf_item_done(bp); - xfs_buf_ioend_finish(bp); + xfs_trans_ail_delete(&bp->b_log_item->bli_item, + (bp->b_flags & _XBF_LOGRECOVERY) ? 0 : + SHUTDOWN_CORRUPT_INCORE); + xfs_buf_item_relse(bp); } diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 23507cbb4c41..50aa0f5ef959 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -50,12 +50,24 @@ struct xfs_buf_log_item { }; int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); +void xfs_buf_item_done(struct xfs_buf *bp); void xfs_buf_item_relse(struct xfs_buf *); bool xfs_buf_item_put(struct xfs_buf_log_item *); void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); void xfs_buf_inode_iodone(struct xfs_buf *); +void xfs_buf_inode_io_fail(struct xfs_buf *bp); +#ifdef CONFIG_XFS_QUOTA void xfs_buf_dquot_iodone(struct xfs_buf *); +void xfs_buf_dquot_io_fail(struct xfs_buf *bp); +#else +static inline void xfs_buf_dquot_iodone(struct xfs_buf *bp) +{ +} +static inline void xfs_buf_dquot_io_fail(struct xfs_buf *bp) +{ +} +#endif /* CONFIG_XFS_QUOTA */ void xfs_buf_iodone(struct xfs_buf *); bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec); diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index 8f0457d67d77..24c7a8d11e1a 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -414,7 +414,7 @@ xlog_recover_validate_buf_type( * * Write verifiers update the metadata LSN from log items attached to * the buffer. Therefore, initialize a bli purely to carry the LSN to - * the verifier. We'll clean it up in our ->iodone() callback. + * the verifier. */ if (bp->b_ops) { struct xfs_buf_log_item *bip; diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index bcd73b9c2994..3072814e407d 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -98,12 +98,33 @@ xfs_qm_adjust_dqlimits( xfs_dquot_set_prealloc_limits(dq); } +/* Set the expiration time of a quota's grace period. */ +time64_t +xfs_dquot_set_timeout( + struct xfs_mount *mp, + time64_t timeout) +{ + struct xfs_quotainfo *qi = mp->m_quotainfo; + + return clamp_t(time64_t, timeout, qi->qi_expiry_min, + qi->qi_expiry_max); +} + +/* Set the length of the default grace period. */ +time64_t +xfs_dquot_set_grace_period( + time64_t grace) +{ + return clamp_t(time64_t, grace, XFS_DQ_GRACE_MIN, XFS_DQ_GRACE_MAX); +} + /* * Determine if this quota counter is over either limit and set the quota * timers as appropriate. */ static inline void xfs_qm_adjust_res_timer( + struct xfs_mount *mp, struct xfs_dquot_res *res, struct xfs_quota_limits *qlim) { @@ -112,7 +133,8 @@ xfs_qm_adjust_res_timer( if ((res->softlimit && res->count > res->softlimit) || (res->hardlimit && res->count > res->hardlimit)) { if (res->timer == 0) - res->timer = ktime_get_real_seconds() + qlim->time; + res->timer = xfs_dquot_set_timeout(mp, + ktime_get_real_seconds() + qlim->time); } else { if (res->timer == 0) res->warnings = 0; @@ -145,9 +167,9 @@ xfs_qm_adjust_dqtimers( ASSERT(dq->q_id); defq = xfs_get_defquota(qi, xfs_dquot_type(dq)); - xfs_qm_adjust_res_timer(&dq->q_blk, &defq->blk); - xfs_qm_adjust_res_timer(&dq->q_ino, &defq->ino); - xfs_qm_adjust_res_timer(&dq->q_rtb, &defq->rtb); + xfs_qm_adjust_res_timer(dq->q_mount, &dq->q_blk, &defq->blk); + xfs_qm_adjust_res_timer(dq->q_mount, &dq->q_ino, &defq->ino); + xfs_qm_adjust_res_timer(dq->q_mount, &dq->q_rtb, &defq->rtb); } /* @@ -201,6 +223,8 @@ xfs_qm_init_dquot_blk( d->dd_diskdq.d_version = XFS_DQUOT_VERSION; d->dd_diskdq.d_id = cpu_to_be32(curid); d->dd_diskdq.d_type = type; + if (curid > 0 && xfs_sb_version_hasbigtime(&mp->m_sb)) + d->dd_diskdq.d_type |= XFS_DQTYPE_BIGTIME; if (xfs_sb_version_hascrc(&mp->m_sb)) { uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid); xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), @@ -514,9 +538,9 @@ xfs_dquot_from_disk( dqp->q_ino.warnings = be16_to_cpu(ddqp->d_iwarns); dqp->q_rtb.warnings = be16_to_cpu(ddqp->d_rtbwarns); - dqp->q_blk.timer = be32_to_cpu(ddqp->d_btimer); - dqp->q_ino.timer = be32_to_cpu(ddqp->d_itimer); - dqp->q_rtb.timer = be32_to_cpu(ddqp->d_rtbtimer); + dqp->q_blk.timer = xfs_dquot_from_disk_ts(ddqp, ddqp->d_btimer); + dqp->q_ino.timer = xfs_dquot_from_disk_ts(ddqp, ddqp->d_itimer); + dqp->q_rtb.timer = xfs_dquot_from_disk_ts(ddqp, ddqp->d_rtbtimer); /* * Reservation counters are defined as reservation plus current usage @@ -559,9 +583,9 @@ xfs_dquot_to_disk( ddqp->d_iwarns = cpu_to_be16(dqp->q_ino.warnings); ddqp->d_rtbwarns = cpu_to_be16(dqp->q_rtb.warnings); - ddqp->d_btimer = cpu_to_be32(dqp->q_blk.timer); - ddqp->d_itimer = cpu_to_be32(dqp->q_ino.timer); - ddqp->d_rtbtimer = cpu_to_be32(dqp->q_rtb.timer); + ddqp->d_btimer = xfs_dquot_to_disk_ts(dqp, dqp->q_blk.timer); + ddqp->d_itimer = xfs_dquot_to_disk_ts(dqp, dqp->q_ino.timer); + ddqp->d_rtbtimer = xfs_dquot_to_disk_ts(dqp, dqp->q_rtb.timer); } /* Allocate and initialize the dquot buffer for this in-core dquot. */ @@ -1107,7 +1131,7 @@ xfs_qm_dqflush_done( } void -xfs_dquot_done( +xfs_buf_dquot_iodone( struct xfs_buf *bp) { struct xfs_log_item *lip, *n; @@ -1118,6 +1142,18 @@ xfs_dquot_done( } } +void +xfs_buf_dquot_io_fail( + struct xfs_buf *bp) +{ + struct xfs_log_item *lip; + + spin_lock(&bp->b_mount->m_ail->ail_lock); + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) + xfs_set_li_failed(lip, bp); + spin_unlock(&bp->b_mount->m_ail->ail_lock); +} + /* Check incore dquot for errors before we flush. */ static xfs_failaddr_t xfs_qm_dqflush_check( @@ -1145,6 +1181,14 @@ xfs_qm_dqflush_check( !dqp->q_rtb.timer) return __this_address; + /* bigtime flag should never be set on root dquots */ + if (dqp->q_type & XFS_DQTYPE_BIGTIME) { + if (!xfs_sb_version_hasbigtime(&dqp->q_mount->m_sb)) + return __this_address; + if (dqp->q_id == 0) + return __this_address; + } + return NULL; } diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 282a65da93c7..f642884a6834 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -237,4 +237,7 @@ typedef int (*xfs_qm_dqiterate_fn)(struct xfs_dquot *dq, int xfs_qm_dqiterate(struct xfs_mount *mp, xfs_dqtype_t type, xfs_qm_dqiterate_fn iter_fn, void *priv); +time64_t xfs_dquot_set_timeout(struct xfs_mount *mp, time64_t timeout); +time64_t xfs_dquot_set_grace_period(time64_t grace); + #endif /* __XFS_DQUOT_H__ */ diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a29f78a663ca..3d1b95124744 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1008,6 +1008,21 @@ xfs_file_fadvise( return ret; } +/* Does this file, inode, or mount want synchronous writes? */ +static inline bool xfs_file_sync_writes(struct file *filp) +{ + struct xfs_inode *ip = XFS_I(file_inode(filp)); + + if (ip->i_mount->m_flags & XFS_MOUNT_WSYNC) + return true; + if (filp->f_flags & (__O_SYNC | O_DSYNC)) + return true; + if (IS_SYNC(file_inode(filp))) + return true; + + return false; +} + STATIC loff_t xfs_file_remap_range( struct file *file_in, @@ -1065,7 +1080,7 @@ xfs_file_remap_range( if (ret) goto out_unlock; - if (mp->m_flags & XFS_MOUNT_WSYNC) + if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out)) xfs_log_force_inode(dest); out_unlock: xfs_iunlock2_io_mmap(src, dest); diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 101028ebb571..deb99300d171 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -52,7 +52,6 @@ xfs_inode_alloc( XFS_STATS_INC(mp, vn_active); ASSERT(atomic_read(&ip->i_pincount) == 0); - ASSERT(!xfs_isiflocked(ip)); ASSERT(ip->i_ino == 0); /* initialise the xfs inode */ @@ -123,7 +122,7 @@ void xfs_inode_free( struct xfs_inode *ip) { - ASSERT(!xfs_isiflocked(ip)); + ASSERT(!xfs_iflags_test(ip, XFS_IFLUSHING)); /* * Because we use RCU freeing we need to ensure the inode always @@ -1035,23 +1034,21 @@ xfs_reclaim_inode( if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) goto out; - if (!xfs_iflock_nowait(ip)) + if (xfs_iflags_test_and_set(ip, XFS_IFLUSHING)) goto out_iunlock; if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { xfs_iunpin_wait(ip); - /* xfs_iflush_abort() drops the flush lock */ xfs_iflush_abort(ip); goto reclaim; } if (xfs_ipincount(ip)) - goto out_ifunlock; + goto out_clear_flush; if (!xfs_inode_clean(ip)) - goto out_ifunlock; + goto out_clear_flush; - xfs_ifunlock(ip); + xfs_iflags_clear(ip, XFS_IFLUSHING); reclaim: - ASSERT(!xfs_isiflocked(ip)); /* * Because we use RCU freeing we need to ensure the inode always appears @@ -1101,8 +1098,8 @@ reclaim: __xfs_inode_free(ip); return; -out_ifunlock: - xfs_ifunlock(ip); +out_clear_flush: + xfs_iflags_clear(ip, XFS_IFLUSHING); out_iunlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); out: @@ -1211,7 +1208,7 @@ xfs_reclaim_inodes( while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { xfs_ail_push_all_sync(mp->m_ail); xfs_reclaim_inodes_ag(mp, &nr_to_scan); - }; + } } /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c06129cffba9..49624973eecc 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -598,22 +598,6 @@ xfs_lock_two_inodes( } } -void -__xfs_iflock( - struct xfs_inode *ip) -{ - wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT); - DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); - - do { - prepare_to_wait_exclusive(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); - if (xfs_isiflocked(ip)) - io_schedule(); - } while (!xfs_iflock_nowait(ip)); - - finish_wait(wq, &wait.wq_entry); -} - STATIC uint _xfs_dic2xflags( uint16_t di_flags, @@ -840,7 +824,7 @@ xfs_ialloc( if (xfs_sb_version_has_v3inode(&mp->m_sb)) { inode_set_iversion(inode, 1); - ip->i_d.di_flags2 = 0; + ip->i_d.di_flags2 = mp->m_ino_geo.new_diflags2; ip->i_d.di_cowextsize = 0; ip->i_d.di_crtime = tv; } @@ -2531,11 +2515,8 @@ retry: * valid, the wrong inode or stale. */ spin_lock(&ip->i_flags_lock); - if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE)) { - spin_unlock(&ip->i_flags_lock); - rcu_read_unlock(); - return; - } + if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE)) + goto out_iflags_unlock; /* * Don't try to lock/unlock the current inode, but we _cannot_ skip the @@ -2552,16 +2533,14 @@ retry: } } ip->i_flags |= XFS_ISTALE; - spin_unlock(&ip->i_flags_lock); - rcu_read_unlock(); /* - * If we can't get the flush lock, the inode is already attached. All + * If the inode is flushing, it is already attached to the buffer. All * we needed to do here is mark the inode stale so buffer IO completion * will remove it from the AIL. */ iip = ip->i_itemp; - if (!xfs_iflock_nowait(ip)) { + if (__xfs_iflags_test(ip, XFS_IFLUSHING)) { ASSERT(!list_empty(&iip->ili_item.li_bio_list)); ASSERT(iip->ili_last_fields); goto out_iunlock; @@ -2573,10 +2552,12 @@ retry: * commit as the flock synchronises removal of the inode from the * cluster buffer against inode reclaim. */ - if (!iip || list_empty(&iip->ili_item.li_bio_list)) { - xfs_ifunlock(ip); + if (!iip || list_empty(&iip->ili_item.li_bio_list)) goto out_iunlock; - } + + __xfs_iflags_set(ip, XFS_IFLUSHING); + spin_unlock(&ip->i_flags_lock); + rcu_read_unlock(); /* we have a dirty inode in memory that has not yet been flushed. */ spin_lock(&iip->ili_lock); @@ -2586,9 +2567,16 @@ retry: spin_unlock(&iip->ili_lock); ASSERT(iip->ili_last_fields); + if (ip != free_ip) + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return; + out_iunlock: if (ip != free_ip) xfs_iunlock(ip, XFS_ILOCK_EXCL); +out_iflags_unlock: + spin_unlock(&ip->i_flags_lock); + rcu_read_unlock(); } /* @@ -2631,8 +2619,9 @@ xfs_ifree_cluster( /* * We obtain and lock the backing buffer first in the process - * here, as we have to ensure that any dirty inode that we - * can't get the flush lock on is attached to the buffer. + * here to ensure dirty inodes attached to the buffer remain in + * the flushing state while we mark them stale. + * * If we scan the in-memory inodes first, then buffer IO can * complete before we get a lock on it, and hence we may fail * to mark all the active inodes on the buffer stale. @@ -2717,7 +2706,7 @@ xfs_ifree( VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ ip->i_d.di_flags = 0; - ip->i_d.di_flags2 = 0; + ip->i_d.di_flags2 = ip->i_mount->m_ino_geo.new_diflags2; ip->i_d.di_dmevmask = 0; ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; @@ -3443,7 +3432,7 @@ xfs_iflush( int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); - ASSERT(xfs_isiflocked(ip)); + ASSERT(xfs_iflags_test(ip, XFS_IFLUSHING)); ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE || ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); ASSERT(iip->ili_item.li_buf == bp); @@ -3553,8 +3542,8 @@ xfs_iflush( * * What we do is move the bits to the ili_last_fields field. When * logging the inode, these bits are moved back to the ili_fields field. - * In the xfs_iflush_done() routine we clear ili_last_fields, since we - * know that the information those bits represent is permanently on + * In the xfs_buf_inode_iodone() routine we clear ili_last_fields, since + * we know that the information those bits represent is permanently on * disk. As long as the flush completes before the inode is logged * again, then both ili_fields and ili_last_fields will be cleared. */ @@ -3568,7 +3557,7 @@ flush_out: /* * Store the current LSN of the inode so that we can tell whether the - * item has moved in the AIL from xfs_iflush_done(). + * item has moved in the AIL from xfs_buf_inode_iodone(). */ xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, &iip->ili_item.li_lsn); @@ -3613,7 +3602,7 @@ xfs_iflush_cluster( /* * Quick and dirty check to avoid locks if possible. */ - if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK)) + if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING)) continue; if (xfs_ipincount(ip)) continue; @@ -3627,7 +3616,7 @@ xfs_iflush_cluster( */ spin_lock(&ip->i_flags_lock); ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE)); - if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK)) { + if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING)) { spin_unlock(&ip->i_flags_lock); continue; } @@ -3635,23 +3624,16 @@ xfs_iflush_cluster( /* * ILOCK will pin the inode against reclaim and prevent * concurrent transactions modifying the inode while we are - * flushing the inode. + * flushing the inode. If we get the lock, set the flushing + * state before we drop the i_flags_lock. */ if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { spin_unlock(&ip->i_flags_lock); continue; } + __xfs_iflags_set(ip, XFS_IFLUSHING); spin_unlock(&ip->i_flags_lock); - /* - * Skip inodes that are already flush locked as they have - * already been written to the buffer. - */ - if (!xfs_iflock_nowait(ip)) { - xfs_iunlock(ip, XFS_ILOCK_SHARED); - continue; - } - /* * Abort flushing this inode if we are shut down because the * inode may not currently be in the AIL. This can occur when @@ -3661,7 +3643,6 @@ xfs_iflush_cluster( */ if (XFS_FORCED_SHUTDOWN(mp)) { xfs_iunpin_wait(ip); - /* xfs_iflush_abort() drops the flush lock */ xfs_iflush_abort(ip); xfs_iunlock(ip, XFS_ILOCK_SHARED); error = -EIO; @@ -3670,7 +3651,7 @@ xfs_iflush_cluster( /* don't block waiting on a log force to unpin dirty inodes */ if (xfs_ipincount(ip)) { - xfs_ifunlock(ip); + xfs_iflags_clear(ip, XFS_IFLUSHING); xfs_iunlock(ip, XFS_ILOCK_SHARED); continue; } @@ -3678,7 +3659,7 @@ xfs_iflush_cluster( if (!xfs_inode_clean(ip)) error = xfs_iflush(ip, bp); else - xfs_ifunlock(ip); + xfs_iflags_clear(ip, XFS_IFLUSHING); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (error) break; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index e9a8bb184d1f..751a3d1d7d84 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -194,6 +194,11 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip) return ip->i_cowfp && ip->i_cowfp->if_bytes; } +static inline bool xfs_inode_has_bigtime(struct xfs_inode *ip) +{ + return ip->i_d.di_flags2 & XFS_DIFLAG2_BIGTIME; +} + /* * Return the buftarg used for data allocations on a given inode. */ @@ -211,8 +216,7 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip) #define XFS_INEW (1 << __XFS_INEW_BIT) #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ -#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ -#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT) +#define XFS_IFLUSHING (1 << 7) /* inode is being flushed */ #define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */ #define XFS_IPINNED (1 << __XFS_IPINNED_BIT) #define XFS_IEOFBLOCKS (1 << 9) /* has the preallocblocks tag set */ @@ -233,36 +237,6 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip) (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ XFS_IDIRTY_RELEASE | XFS_ITRUNCATED) -/* - * Synchronize processes attempting to flush the in-core inode back to disk. - */ - -static inline int xfs_isiflocked(struct xfs_inode *ip) -{ - return xfs_iflags_test(ip, XFS_IFLOCK); -} - -extern void __xfs_iflock(struct xfs_inode *ip); - -static inline int xfs_iflock_nowait(struct xfs_inode *ip) -{ - return !xfs_iflags_test_and_set(ip, XFS_IFLOCK); -} - -static inline void xfs_iflock(struct xfs_inode *ip) -{ - if (!xfs_iflock_nowait(ip)) - __xfs_iflock(ip); -} - -static inline void xfs_ifunlock(struct xfs_inode *ip) -{ - ASSERT(xfs_isiflocked(ip)); - xfs_iflags_clear(ip, XFS_IFLOCK); - smp_mb(); - wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT); -} - /* * Flags for inode locking. * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 6c65938cee1c..17e20a6d8b4e 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -295,6 +295,28 @@ xfs_inode_item_format_attr_fork( } } +/* + * Convert an incore timestamp to a log timestamp. Note that the log format + * specifies host endian format! + */ +static inline xfs_ictimestamp_t +xfs_inode_to_log_dinode_ts( + struct xfs_inode *ip, + const struct timespec64 tv) +{ + struct xfs_legacy_ictimestamp *lits; + xfs_ictimestamp_t its; + + if (xfs_inode_has_bigtime(ip)) + return xfs_inode_encode_bigtime(tv); + + lits = (struct xfs_legacy_ictimestamp *)&its; + lits->t_sec = tv.tv_sec; + lits->t_nsec = tv.tv_nsec; + + return its; +} + static void xfs_inode_to_log_dinode( struct xfs_inode *ip, @@ -313,12 +335,9 @@ xfs_inode_to_log_dinode( memset(to->di_pad, 0, sizeof(to->di_pad)); memset(to->di_pad3, 0, sizeof(to->di_pad3)); - to->di_atime.t_sec = inode->i_atime.tv_sec; - to->di_atime.t_nsec = inode->i_atime.tv_nsec; - to->di_mtime.t_sec = inode->i_mtime.tv_sec; - to->di_mtime.t_nsec = inode->i_mtime.tv_nsec; - to->di_ctime.t_sec = inode->i_ctime.tv_sec; - to->di_ctime.t_nsec = inode->i_ctime.tv_nsec; + to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode->i_atime); + to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode->i_mtime); + to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode->i_ctime); to->di_nlink = inode->i_nlink; to->di_gen = inode->i_generation; to->di_mode = inode->i_mode; @@ -340,8 +359,7 @@ xfs_inode_to_log_dinode( if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { to->di_version = 3; to->di_changecount = inode_peek_iversion(inode); - to->di_crtime.t_sec = from->di_crtime.tv_sec; - to->di_crtime.t_nsec = from->di_crtime.tv_nsec; + to->di_crtime = xfs_inode_to_log_dinode_ts(ip, from->di_crtime); to->di_flags2 = from->di_flags2; to->di_cowextsize = from->di_cowextsize; to->di_ino = ip->i_ino; @@ -491,8 +509,7 @@ xfs_inode_item_push( (ip->i_flags & XFS_ISTALE)) return XFS_ITEM_PINNED; - /* If the inode is already flush locked, we're already flushing. */ - if (xfs_isiflocked(ip)) + if (xfs_iflags_test(ip, XFS_IFLUSHING)) return XFS_ITEM_FLUSHING; if (!xfs_buf_trylock(bp)) @@ -703,7 +720,7 @@ xfs_iflush_finish( iip->ili_last_fields = 0; iip->ili_flush_lsn = 0; spin_unlock(&iip->ili_lock); - xfs_ifunlock(iip->ili_inode); + xfs_iflags_clear(iip->ili_inode, XFS_IFLUSHING); if (drop_buffer) xfs_buf_rele(bp); } @@ -711,11 +728,11 @@ xfs_iflush_finish( /* * Inode buffer IO completion routine. It is responsible for removing inodes - * attached to the buffer from the AIL if they have not been re-logged, as well - * as completing the flush and unlocking the inode. + * attached to the buffer from the AIL if they have not been re-logged and + * completing the inode flush. */ void -xfs_iflush_done( +xfs_buf_inode_iodone( struct xfs_buf *bp) { struct xfs_log_item *lip, *n; @@ -754,11 +771,21 @@ xfs_iflush_done( list_splice_tail(&flushed_inodes, &bp->b_li_list); } +void +xfs_buf_inode_io_fail( + struct xfs_buf *bp) +{ + struct xfs_log_item *lip; + + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) + set_bit(XFS_LI_FAILED, &lip->li_flags); +} + /* - * This is the inode flushing abort routine. It is called from xfs_iflush when + * This is the inode flushing abort routine. It is called when * the filesystem is shutting down to clean up the inode state. It is * responsible for removing the inode item from the AIL if it has not been - * re-logged, and unlocking the inode's flush lock. + * re-logged and clearing the inode's flush state. */ void xfs_iflush_abort( @@ -790,7 +817,7 @@ xfs_iflush_abort( list_del_init(&iip->ili_item.li_bio_list); spin_unlock(&iip->ili_lock); } - xfs_ifunlock(ip); + xfs_iflags_clear(ip, XFS_IFLUSHING); if (bp) xfs_buf_rele(bp); } diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 048b5e7dee90..4b926e32831c 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -25,8 +25,8 @@ struct xfs_inode_log_item { * * We need atomic changes between inode dirtying, inode flushing and * inode completion, but these all hold different combinations of - * ILOCK and iflock and hence we need some other method of serialising - * updates to the flush state. + * ILOCK and IFLUSHING and hence we need some other method of + * serialising updates to the flush state. */ spinlock_t ili_lock; /* flush state lock */ unsigned int ili_last_fields; /* fields when flushed */ @@ -43,7 +43,6 @@ static inline int xfs_inode_clean(struct xfs_inode *ip) extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); extern void xfs_inode_item_destroy(struct xfs_inode *); -extern void xfs_iflush_done(struct xfs_buf *); extern void xfs_iflush_abort(struct xfs_inode *); extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, struct xfs_inode_log_format *); diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index 5e0d291835b3..cb44f7653f03 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -115,6 +115,82 @@ out_free_ip: return error; } +static inline bool xfs_log_dinode_has_bigtime(const struct xfs_log_dinode *ld) +{ + return ld->di_version >= 3 && + (ld->di_flags2 & XFS_DIFLAG2_BIGTIME); +} + +/* Convert a log timestamp to an ondisk timestamp. */ +static inline xfs_timestamp_t +xfs_log_dinode_to_disk_ts( + struct xfs_log_dinode *from, + const xfs_ictimestamp_t its) +{ + struct xfs_legacy_timestamp *lts; + struct xfs_legacy_ictimestamp *lits; + xfs_timestamp_t ts; + + if (xfs_log_dinode_has_bigtime(from)) + return cpu_to_be64(its); + + lts = (struct xfs_legacy_timestamp *)&ts; + lits = (struct xfs_legacy_ictimestamp *)&its; + lts->t_sec = cpu_to_be32(lits->t_sec); + lts->t_nsec = cpu_to_be32(lits->t_nsec); + + return ts; +} + +STATIC void +xfs_log_dinode_to_disk( + struct xfs_log_dinode *from, + struct xfs_dinode *to) +{ + to->di_magic = cpu_to_be16(from->di_magic); + to->di_mode = cpu_to_be16(from->di_mode); + to->di_version = from->di_version; + to->di_format = from->di_format; + to->di_onlink = 0; + to->di_uid = cpu_to_be32(from->di_uid); + to->di_gid = cpu_to_be32(from->di_gid); + to->di_nlink = cpu_to_be32(from->di_nlink); + to->di_projid_lo = cpu_to_be16(from->di_projid_lo); + to->di_projid_hi = cpu_to_be16(from->di_projid_hi); + memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); + + to->di_atime = xfs_log_dinode_to_disk_ts(from, from->di_atime); + to->di_mtime = xfs_log_dinode_to_disk_ts(from, from->di_mtime); + to->di_ctime = xfs_log_dinode_to_disk_ts(from, from->di_ctime); + + to->di_size = cpu_to_be64(from->di_size); + to->di_nblocks = cpu_to_be64(from->di_nblocks); + to->di_extsize = cpu_to_be32(from->di_extsize); + to->di_nextents = cpu_to_be32(from->di_nextents); + to->di_anextents = cpu_to_be16(from->di_anextents); + to->di_forkoff = from->di_forkoff; + to->di_aformat = from->di_aformat; + to->di_dmevmask = cpu_to_be32(from->di_dmevmask); + to->di_dmstate = cpu_to_be16(from->di_dmstate); + to->di_flags = cpu_to_be16(from->di_flags); + to->di_gen = cpu_to_be32(from->di_gen); + + if (from->di_version == 3) { + to->di_changecount = cpu_to_be64(from->di_changecount); + to->di_crtime = xfs_log_dinode_to_disk_ts(from, + from->di_crtime); + to->di_flags2 = cpu_to_be64(from->di_flags2); + to->di_cowextsize = cpu_to_be32(from->di_cowextsize); + to->di_ino = cpu_to_be64(from->di_ino); + to->di_lsn = cpu_to_be64(from->di_lsn); + memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); + uuid_copy(&to->di_uuid, &from->di_uuid); + to->di_flushiter = 0; + } else { + to->di_flushiter = cpu_to_be16(from->di_flushiter); + } +} + STATIC int xlog_recover_inode_commit_pass2( struct xlog *log, diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 6f22a66777cd..bca7659fb5c6 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -404,7 +404,7 @@ xfs_ioc_attr_list( context.cursor.offset)) return -EINVAL; - buffer = kmem_zalloc_large(bufsize, 0); + buffer = kvzalloc(bufsize, GFP_KERNEL); if (!buffer) return -ENOMEM; @@ -1190,7 +1190,8 @@ xfs_flags2diflags2( unsigned int xflags) { uint64_t di_flags2 = - (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK); + (ip->i_d.di_flags2 & (XFS_DIFLAG2_REFLINK | + XFS_DIFLAG2_BIGTIME)); if (xflags & FS_XFLAG_DAX) di_flags2 |= XFS_DIFLAG2_DAX; @@ -1690,7 +1691,7 @@ xfs_ioc_getbmap( if (bmx.bmv_count > ULONG_MAX / recsize) return -ENOMEM; - buf = kmem_zalloc_large(bmx.bmv_count * sizeof(*buf), 0); + buf = kvzalloc(bmx.bmv_count * sizeof(*buf), GFP_KERNEL); if (!buf) return -ENOMEM; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index e2ec91b2d0f4..a17d788921d6 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -265,32 +265,6 @@ xlog_header_check_mount( return 0; } -void -xlog_recover_iodone( - struct xfs_buf *bp) -{ - if (bp->b_error) { - /* - * We're not going to bother about retrying - * this during recovery. One strike! - */ - if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) { - xfs_buf_ioerror_alert(bp, __this_address); - xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); - } - } - - /* - * On v5 supers, a bli could be attached to update the metadata LSN. - * Clean it up. - */ - if (bp->b_log_item) - xfs_buf_item_relse(bp); - ASSERT(bp->b_log_item == NULL); - bp->b_flags &= ~_XBF_LOGRECOVERY; - xfs_buf_ioend_finish(bp); -} - /* * This routine finds (to an approximation) the first block in the physical * log which contains the given cycle. It uses a binary search algorithm. @@ -2097,7 +2071,7 @@ xlog_recover_add_to_cont_trans( old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; - ptr = kmem_realloc(old_ptr, len + old_len, 0); + ptr = krealloc(old_ptr, len + old_len, GFP_KERNEL | __GFP_NOFAIL); memcpy(&ptr[old_len], dp, len); item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_addr = ptr; @@ -3294,14 +3268,14 @@ xlog_do_log_recovery( */ STATIC int xlog_do_recover( - struct xlog *log, - xfs_daddr_t head_blk, - xfs_daddr_t tail_blk) + struct xlog *log, + xfs_daddr_t head_blk, + xfs_daddr_t tail_blk) { - struct xfs_mount *mp = log->l_mp; - int error; - xfs_buf_t *bp; - xfs_sb_t *sbp; + struct xfs_mount *mp = log->l_mp; + struct xfs_buf *bp = mp->m_sb_bp; + struct xfs_sb *sbp = &mp->m_sb; + int error; trace_xfs_log_recover(log, head_blk, tail_blk); @@ -3315,9 +3289,8 @@ xlog_do_recover( /* * If IO errors happened during recovery, bail out. */ - if (XFS_FORCED_SHUTDOWN(mp)) { + if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - } /* * We now update the tail_lsn since much of the recovery has completed @@ -3331,16 +3304,12 @@ xlog_do_recover( xlog_assign_tail_lsn(mp); /* - * Now that we've finished replaying all buffer and inode - * updates, re-read in the superblock and reverify it. + * Now that we've finished replaying all buffer and inode updates, + * re-read the superblock and reverify it. */ - bp = xfs_getsb(mp); - bp->b_flags &= ~(XBF_DONE | XBF_ASYNC); - ASSERT(!(bp->b_flags & XBF_WRITE)); - bp->b_flags |= XBF_READ; - bp->b_ops = &xfs_sb_buf_ops; - - error = xfs_buf_submit(bp); + xfs_buf_lock(bp); + xfs_buf_hold(bp); + error = _xfs_buf_read(bp, XBF_READ); if (error) { if (!XFS_FORCED_SHUTDOWN(mp)) { xfs_buf_ioerror_alert(bp, __this_address); @@ -3351,7 +3320,6 @@ xlog_do_recover( } /* Convert superblock from on-disk format */ - sbp = &mp->m_sb; xfs_sb_from_disk(sbp, bp->b_addr); xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index c8ae49a1e99c..150ee5cb8645 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -80,9 +80,9 @@ xfs_uuid_mount( } if (hole < 0) { - xfs_uuid_table = kmem_realloc(xfs_uuid_table, + xfs_uuid_table = krealloc(xfs_uuid_table, (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table), - 0); + GFP_KERNEL | __GFP_NOFAIL); hole = xfs_uuid_table_size++; } xfs_uuid_table[hole] = *uuid; @@ -1059,11 +1059,12 @@ xfs_unmountfs( * We can potentially deadlock here if we have an inode cluster * that has been freed has its buffer still pinned in memory because * the transaction is still sitting in a iclog. The stale inodes - * on that buffer will have their flush locks held until the - * transaction hits the disk and the callbacks run. the inode - * flush takes the flush lock unconditionally and with nothing to - * push out the iclog we will never get that unlocked. hence we - * need to force the log first. + * on that buffer will be pinned to the buffer until the + * transaction hits the disk and the callbacks run. Pushing the AIL will + * skip the stale inodes and may never see the pinned buffer, so + * nothing will push out the iclog and unpin the buffer. Hence we + * need to force the log here to ensure all items are flushed into the + * AIL before we go any further. */ xfs_log_force(mp, XFS_LOG_SYNC); @@ -1288,23 +1289,6 @@ xfs_mod_frextents( return ret; } -/* - * xfs_getsb() is called to obtain the buffer for the superblock. - * The buffer is returned locked and read in from disk. - * The buffer should be released with a call to xfs_brelse(). - */ -struct xfs_buf * -xfs_getsb( - struct xfs_mount *mp) -{ - struct xfs_buf *bp = mp->m_sb_bp; - - xfs_buf_lock(bp); - xfs_buf_hold(bp); - ASSERT(bp->b_flags & XBF_DONE); - return bp; -} - /* * Used to free the superblock along various error paths. */ diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a72cfcaa4ad1..dfa429b77ee2 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -410,7 +410,6 @@ extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, bool reserved); extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta); -extern struct xfs_buf *xfs_getsb(xfs_mount_t *); extern int xfs_readsb(xfs_mount_t *, int); extern void xfs_freesb(xfs_mount_t *); extern bool xfs_fs_writable(struct xfs_mount *mp, int level); diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index 5f04d8a5ab2a..0aa87c210104 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -15,6 +15,10 @@ "XFS: offsetof(" #structname ", " #member ") is wrong, " \ "expected " #off) +#define XFS_CHECK_VALUE(value, expected) \ + BUILD_BUG_ON_MSG((value) != (expected), \ + "XFS: value of " #value " is wrong, expected " #expected) + static inline void __init xfs_check_ondisk_structs(void) { @@ -23,7 +27,7 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_acl_entry, 12); XFS_CHECK_STRUCT_SIZE(struct xfs_agf, 224); XFS_CHECK_STRUCT_SIZE(struct xfs_agfl, 36); - XFS_CHECK_STRUCT_SIZE(struct xfs_agi, 336); + XFS_CHECK_STRUCT_SIZE(struct xfs_agi, 344); XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_key, 8); XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_rec, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_bmdr_block, 4); @@ -41,7 +45,8 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_rec, 12); XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_key, 20); XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_rec, 24); - XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp, 8); + XFS_CHECK_STRUCT_SIZE(xfs_timestamp_t, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_legacy_timestamp, 8); XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t, 8); XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t, 4); XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t, 8); @@ -84,12 +89,12 @@ xfs_check_ondisk_structs(void) XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, namelen, 8); XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, name, 9); XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t, 40); - XFS_CHECK_OFFSET(xfs_attr_shortform_t, hdr.totsize, 0); - XFS_CHECK_OFFSET(xfs_attr_shortform_t, hdr.count, 2); - XFS_CHECK_OFFSET(xfs_attr_shortform_t, list[0].namelen, 4); - XFS_CHECK_OFFSET(xfs_attr_shortform_t, list[0].valuelen, 5); - XFS_CHECK_OFFSET(xfs_attr_shortform_t, list[0].flags, 6); - XFS_CHECK_OFFSET(xfs_attr_shortform_t, list[0].nameval, 7); + XFS_CHECK_OFFSET(struct xfs_attr_shortform, hdr.totsize, 0); + XFS_CHECK_OFFSET(struct xfs_attr_shortform, hdr.count, 2); + XFS_CHECK_OFFSET(struct xfs_attr_shortform, list[0].namelen, 4); + XFS_CHECK_OFFSET(struct xfs_attr_shortform, list[0].valuelen, 5); + XFS_CHECK_OFFSET(struct xfs_attr_shortform, list[0].flags, 6); + XFS_CHECK_OFFSET(struct xfs_attr_shortform, list[0].nameval, 7); XFS_CHECK_STRUCT_SIZE(xfs_da_blkinfo_t, 12); XFS_CHECK_STRUCT_SIZE(xfs_da_intnode_t, 16); XFS_CHECK_STRUCT_SIZE(xfs_da_node_entry_t, 8); @@ -121,7 +126,8 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode, 176); XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log, 28); - XFS_CHECK_STRUCT_SIZE(struct xfs_ictimestamp, 8); + XFS_CHECK_STRUCT_SIZE(xfs_ictimestamp_t, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_legacy_ictimestamp, 8); XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32, 52); XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format, 56); XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20); @@ -152,6 +158,20 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24); XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64); XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64); + + /* + * Make sure the incore inode timestamp range corresponds to hand + * converted values based on the ondisk format specification. + */ + XFS_CHECK_VALUE(XFS_BIGTIME_TIME_MIN - XFS_BIGTIME_EPOCH_OFFSET, + XFS_LEGACY_TIME_MIN); + XFS_CHECK_VALUE(XFS_BIGTIME_TIME_MAX - XFS_BIGTIME_EPOCH_OFFSET, + 16299260424LL); + + /* Do the same with the incore quota expiration range. */ + XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MIN << XFS_DQ_BIGTIME_SHIFT, 4); + XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MAX << XFS_DQ_BIGTIME_SHIFT, + 16299260424LL); } #endif /* __XFS_ONDISK_H */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index be67570badf8..3f82e0c92c2d 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -661,6 +661,17 @@ xfs_qm_init_quotainfo( /* Precalc some constants */ qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(qinf->qi_dqchunklen); + if (xfs_sb_version_hasbigtime(&mp->m_sb)) { + qinf->qi_expiry_min = + xfs_dq_bigtime_to_unix(XFS_DQ_BIGTIME_EXPIRY_MIN); + qinf->qi_expiry_max = + xfs_dq_bigtime_to_unix(XFS_DQ_BIGTIME_EXPIRY_MAX); + } else { + qinf->qi_expiry_min = XFS_DQ_LEGACY_EXPIRY_MIN; + qinf->qi_expiry_max = XFS_DQ_LEGACY_EXPIRY_MAX; + } + trace_xfs_quota_expiry_range(mp, qinf->qi_expiry_min, + qinf->qi_expiry_max); mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); @@ -879,6 +890,8 @@ xfs_qm_reset_dqcounts( ddq->d_bwarns = 0; ddq->d_iwarns = 0; ddq->d_rtbwarns = 0; + if (xfs_sb_version_hasbigtime(&mp->m_sb)) + ddq->d_type |= XFS_DQTYPE_BIGTIME; } if (xfs_sb_version_hascrc(&mp->m_sb)) { diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 9c078c35d924..e3dabab44097 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -65,6 +65,10 @@ struct xfs_quotainfo { struct xfs_def_quota qi_grp_default; struct xfs_def_quota qi_prj_default; struct shrinker qi_shrinker; + + /* Minimum and maximum quota expiration timestamp values. */ + time64_t qi_expiry_min; + time64_t qi_expiry_max; }; static inline struct radix_tree_root * diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 1c542b4a5220..ca1b57d291dc 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -479,13 +479,19 @@ xfs_setqlim_warns( static inline void xfs_setqlim_timer( + struct xfs_mount *mp, struct xfs_dquot_res *res, struct xfs_quota_limits *qlim, s64 timer) { - res->timer = timer; - if (qlim) - qlim->time = timer; + if (qlim) { + /* Set the length of the default grace period. */ + res->timer = xfs_dquot_set_grace_period(timer); + qlim->time = res->timer; + } else { + /* Set the grace period expiration on a quota. */ + res->timer = xfs_dquot_set_timeout(mp, timer); + } } /* @@ -574,7 +580,7 @@ xfs_qm_scall_setqlim( if (newlim->d_fieldmask & QC_SPC_WARNS) xfs_setqlim_warns(res, qlim, newlim->d_spc_warns); if (newlim->d_fieldmask & QC_SPC_TIMER) - xfs_setqlim_timer(res, qlim, newlim->d_spc_timer); + xfs_setqlim_timer(mp, res, qlim, newlim->d_spc_timer); /* Blocks on the realtime device. */ hard = (newlim->d_fieldmask & QC_RT_SPC_HARD) ? @@ -590,7 +596,7 @@ xfs_qm_scall_setqlim( if (newlim->d_fieldmask & QC_RT_SPC_WARNS) xfs_setqlim_warns(res, qlim, newlim->d_rt_spc_warns); if (newlim->d_fieldmask & QC_RT_SPC_TIMER) - xfs_setqlim_timer(res, qlim, newlim->d_rt_spc_timer); + xfs_setqlim_timer(mp, res, qlim, newlim->d_rt_spc_timer); /* Inodes */ hard = (newlim->d_fieldmask & QC_INO_HARD) ? @@ -606,7 +612,7 @@ xfs_qm_scall_setqlim( if (newlim->d_fieldmask & QC_INO_WARNS) xfs_setqlim_warns(res, qlim, newlim->d_ino_warns); if (newlim->d_fieldmask & QC_INO_TIMER) - xfs_setqlim_timer(res, qlim, newlim->d_ino_timer); + xfs_setqlim_timer(mp, res, qlim, newlim->d_ino_timer); if (id != 0) { /* diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 06b22e35fc90..5a62398940d0 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -108,8 +108,6 @@ extern void xfs_qm_mount_quotas(struct xfs_mount *); extern void xfs_qm_unmount(struct xfs_mount *); extern void xfs_qm_unmount_quotas(struct xfs_mount *); -void xfs_dquot_done(struct xfs_buf *); - #else static inline int xfs_qm_vop_dqalloc(struct xfs_inode *ip, kuid_t kuid, kgid_t kgid, @@ -151,12 +149,6 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp, #define xfs_qm_mount_quotas(mp) #define xfs_qm_unmount(mp) #define xfs_qm_unmount_quotas(mp) - -static inline void xfs_dquot_done(struct xfs_buf *bp) -{ - return; -} - #endif /* CONFIG_XFS_QUOTA */ #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 6209e7b6b895..5b89c12f1566 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -247,6 +247,9 @@ xfs_rtallocate_extent_block( end = XFS_BLOCKTOBIT(mp, bbno + 1) - 1; i <= end; i++) { + /* Make sure we don't scan off the end of the rt volume. */ + maxlen = min(mp->m_sb.sb_rextents, i + maxlen) - i; + /* * See if there's a free extent of maxlen starting at i. * If it's not so then next will contain the first non-free. @@ -442,6 +445,14 @@ xfs_rtallocate_extent_near( */ if (bno >= mp->m_sb.sb_rextents) bno = mp->m_sb.sb_rextents - 1; + + /* Make sure we don't run off the end of the rt volume. */ + maxlen = min(mp->m_sb.sb_rextents, bno + maxlen) - bno; + if (maxlen < minlen) { + *rtblock = NULLRTBLOCK; + return 0; + } + /* * Try the exact allocation first. */ @@ -862,7 +873,7 @@ xfs_alloc_rsum_cache( * lower bound on the minimum level with any free extents. We can * continue without the cache if it couldn't be allocated. */ - mp->m_rsum_cache = kmem_zalloc_large(rbmblocks, 0); + mp->m_rsum_cache = kvzalloc(rbmblocks, GFP_KERNEL); if (!mp->m_rsum_cache) xfs_warn(mp, "could not allocate realtime summary cache"); } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 71ac6c1cdc36..baf5de30eebb 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -654,11 +654,11 @@ xfs_fs_destroy_inode( ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); /* - * We always use background reclaim here because even if the - * inode is clean, it still may be under IO and hence we have - * to take the flush lock. The background reclaim path handles - * this more efficiently than we can here, so simply let background - * reclaim tear down all inodes. + * We always use background reclaim here because even if the inode is + * clean, it still may be under IO and hence we have wait for IO + * completion to occur before we can reclaim the inode. The background + * reclaim path handles this more efficiently than we can here, so + * simply let background reclaim tear down all inodes. */ xfs_inode_set_reclaim_tag(ip); } @@ -1484,8 +1484,14 @@ xfs_fc_fill_super( sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_max_links = XFS_MAXLINK; sb->s_time_gran = 1; - sb->s_time_min = S32_MIN; - sb->s_time_max = S32_MAX; + if (xfs_sb_version_hasbigtime(&mp->m_sb)) { + sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN); + sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX); + } else { + sb->s_time_min = XFS_LEGACY_TIME_MIN; + sb->s_time_max = XFS_LEGACY_TIME_MAX; + } + trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max); sb->s_iflags |= SB_I_CGROUPWB; set_posix_acl_flag(sb); @@ -1494,6 +1500,10 @@ xfs_fc_fill_super( if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) sb->s_flags |= SB_I_VERSION; + if (xfs_sb_version_hasbigtime(&mp->m_sb)) + xfs_warn(mp, + "EXPERIMENTAL big timestamp feature in use. Use at your own risk!"); + if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS) { bool rtdev_is_dax = false, datadev_is_dax; @@ -1549,6 +1559,10 @@ xfs_fc_fill_super( goto out_filestream_unmount; } + if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) + xfs_warn(mp, + "EXPERIMENTAL inode btree counters feature in use. Use at your own risk!"); + error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index abb1d859f226..dcdcf99cfa5d 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -338,7 +338,7 @@ DEFINE_BUF_EVENT(xfs_buf_delwri_split); DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf); DEFINE_BUF_EVENT(xfs_buf_get_uncached); DEFINE_BUF_EVENT(xfs_buf_item_relse); -DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); +DEFINE_BUF_EVENT(xfs_buf_iodone_async); DEFINE_BUF_EVENT(xfs_buf_error_relse); DEFINE_BUF_EVENT(xfs_buf_wait_buftarg); DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); @@ -3676,7 +3676,6 @@ DEFINE_EVENT(xfs_kmem_class, name, \ DEFINE_KMEM_EVENT(kmem_alloc); DEFINE_KMEM_EVENT(kmem_alloc_io); DEFINE_KMEM_EVENT(kmem_alloc_large); -DEFINE_KMEM_EVENT(kmem_realloc); TRACE_EVENT(xfs_check_new_dalign, TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino), @@ -3844,6 +3843,32 @@ TRACE_EVENT(xfs_btree_bload_block, __entry->nr_records) ) +DECLARE_EVENT_CLASS(xfs_timestamp_range_class, + TP_PROTO(struct xfs_mount *mp, time64_t min, time64_t max), + TP_ARGS(mp, min, max), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(long long, min) + __field(long long, max) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->min = min; + __entry->max = max; + ), + TP_printk("dev %d:%d min %lld max %lld", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->min, + __entry->max) +) + +#define DEFINE_TIMESTAMP_RANGE_EVENT(name) \ +DEFINE_EVENT(xfs_timestamp_range_class, name, \ + TP_PROTO(struct xfs_mount *mp, long long min, long long max), \ + TP_ARGS(mp, min, max)) +DEFINE_TIMESTAMP_RANGE_EVENT(xfs_inode_timestamp_range); +DEFINE_TIMESTAMP_RANGE_EVENT(xfs_quota_expiry_range); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index ed72867b1a19..ca18a040336a 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -468,7 +468,7 @@ xfs_trans_apply_sb_deltas( xfs_buf_t *bp; int whole = 0; - bp = xfs_trans_getsb(tp, tp->t_mountp); + bp = xfs_trans_getsb(tp); sbp = bp->b_addr; /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index b752501818d2..f46534b75236 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -209,7 +209,7 @@ xfs_trans_read_buf( flags, bpp, ops); } -struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *); +struct xfs_buf *xfs_trans_getsb(struct xfs_trans *); void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 11cd666cd99a..42d63b830cb9 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -166,50 +166,34 @@ xfs_trans_get_buf_map( } /* - * Get and lock the superblock buffer of this file system for the - * given transaction. - * - * We don't need to use incore_match() here, because the superblock - * buffer is a private buffer which we keep a pointer to in the - * mount structure. + * Get and lock the superblock buffer for the given transaction. */ -xfs_buf_t * +struct xfs_buf * xfs_trans_getsb( - xfs_trans_t *tp, - struct xfs_mount *mp) + struct xfs_trans *tp) { - xfs_buf_t *bp; - struct xfs_buf_log_item *bip; + struct xfs_buf *bp = tp->t_mountp->m_sb_bp; /* - * Default to just trying to lock the superblock buffer - * if tp is NULL. + * Just increment the lock recursion count if the buffer is already + * attached to this transaction. */ - if (tp == NULL) - return xfs_getsb(mp); - - /* - * If the superblock buffer already has this transaction - * pointer in its b_fsprivate2 field, then we know we already - * have it locked. In this case we just increment the lock - * recursion count and return the buffer to the caller. - */ - bp = mp->m_sb_bp; if (bp->b_transp == tp) { - bip = bp->b_log_item; + struct xfs_buf_log_item *bip = bp->b_log_item; + ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; + trace_xfs_trans_getsb_recur(bip); - return bp; + } else { + xfs_buf_lock(bp); + xfs_buf_hold(bp); + _xfs_trans_bjoin(tp, bp, 1); + + trace_xfs_trans_getsb(bp->b_log_item); } - bp = xfs_getsb(mp); - if (bp == NULL) - return NULL; - - _xfs_trans_bjoin(tp, bp, 1); - trace_xfs_trans_getsb(bp->b_log_item); return bp; } diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index c6ba7ef18e06..133fc6fc3edd 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -55,6 +55,12 @@ xfs_trans_log_dquot( { ASSERT(XFS_DQ_IS_LOCKED(dqp)); + /* Upgrade the dquot to bigtime format if possible. */ + if (dqp->q_id != 0 && + xfs_sb_version_hasbigtime(&tp->t_mountp->m_sb) && + !(dqp->q_type & XFS_DQTYPE_BIGTIME)) + dqp->q_type |= XFS_DQTYPE_BIGTIME; + tp->t_flags |= XFS_TRANS_DIRTY; set_bit(XFS_LI_DIRTY, &dqp->q_logitem.qli_item.li_flags); }